def receive_data_chunk(self, raw_data, start): """ Will be called to write 1Mb chunks of data (except for the last chunk). """ print u"Received {} bytes - {}".format(len(raw_data), self.seq_number) if not self.uuid: data_object = DataObject.create(raw_data, settings.COMPRESS_UPLOADS) self.uuid = data_object.uuid else: DataObject.append_chunk(self.uuid, raw_data, self.seq_number, settings.COMPRESS_UPLOADS) self.seq_number += 1 self.hasher.update(raw_data) return None
def process_create_entry_work(self, resc_dict, context, is_reference): # MOSTLY the resource will not exist... so start by calculating the URL and trying to insert the entire record.. if is_reference: url = "file://{}{}/{}".format(context['local_ip'], context['path'], context['entry']) else: with open(context['fullpath'], 'r') as f: seq_number = 0 data_uuid = None for chk in read_in_chunks(f): if seq_number == 0: data_object = DataObject.create( chk, resc_dict['compress']) data_uuid = data_object.uuid else: DataObject.append_chunk(data_uuid, chk, seq_number, resc_dict['compress']) seq_number += 1 if data_uuid: url = "cassandra://{}".format(data_uuid) else: return None try: # OK -- try to insert ( create ) the record... t1 = time.time() resource = Resource.create(container=resc_dict['container'], name=resc_dict['name'], url=url, mimetype=resc_dict['mimetype'], username=context['user'], size=resc_dict['size']) resource.create_acl_list(resc_dict['read_access'], resc_dict['write_access']) msg = 'Resource {} created --> {}'.format(resource.get_name(), time.time() - t1) logger.info(msg) except ResourceConflictError: # If the create fails, the record already exists... so retrieve it... t1 = time.time() resource = Resource.find( merge(resc_dict['container'], resc_dict['name'])) msg = "{} ::: Fetch Object -> {}".format(resource.get_name(), time.time() - t1) logger.info(msg) # if the url is not correct then update # TODO: if the url is a block set that is stored internally then reduce its count so that it can be GC'd. # t3 = None if resource.url != url: t2 = time.time() # if url.startswith('cassandra://') : tidy up the stored block count... resource.update(url=url) t3 = time.time() msg = "{} ::: update -> {}".format(resource.get_name(), t3 - t2) logger.info(msg) # t1 = time.time() SearchIndex.reset(resource.uuid) SearchIndex.index(resource, ['name', 'metadata'])
def create_empty_data_object(self): data_object = DataObject.create(None) return data_object.uuid
def create_data_object(self, raw_data, metadata=None, create_ts=None, acl=None): data_object = DataObject.create(raw_data, settings.COMPRESS_UPLOADS, metadata=metadata,create_ts=create_ts, acl=acl) return data_object.uuid