Esempio n. 1
0
    def receive_data_chunk(self, raw_data, start):
        """
        Will be called to write 1Mb chunks of data (except for the
        last chunk).
        """
        print u"Received {} bytes - {}".format(len(raw_data), self.seq_number)
        if not self.uuid:
            data_object = DataObject.create(raw_data,
                                            settings.COMPRESS_UPLOADS)
            self.uuid = data_object.uuid
        else:
            DataObject.append_chunk(self.uuid, raw_data, self.seq_number,
                                    settings.COMPRESS_UPLOADS)
        self.seq_number += 1

        self.hasher.update(raw_data)

        return None
Esempio n. 2
0
    def process_create_entry_work(self, resc_dict, context, is_reference):
        # MOSTLY the resource will not exist... so start by calculating the URL and trying to insert the entire record..
        if is_reference:
            url = "file://{}{}/{}".format(context['local_ip'], context['path'],
                                          context['entry'])
        else:
            with open(context['fullpath'], 'r') as f:
                seq_number = 0
                data_uuid = None

                for chk in read_in_chunks(f):
                    if seq_number == 0:
                        data_object = DataObject.create(
                            chk, resc_dict['compress'])
                        data_uuid = data_object.uuid
                    else:
                        DataObject.append_chunk(data_uuid, chk, seq_number,
                                                resc_dict['compress'])
                    seq_number += 1
                if data_uuid:
                    url = "cassandra://{}".format(data_uuid)
                else:
                    return None

        try:
            # OK -- try to insert ( create ) the record...
            t1 = time.time()

            resource = Resource.create(container=resc_dict['container'],
                                       name=resc_dict['name'],
                                       url=url,
                                       mimetype=resc_dict['mimetype'],
                                       username=context['user'],
                                       size=resc_dict['size'])
            resource.create_acl_list(resc_dict['read_access'],
                                     resc_dict['write_access'])

            msg = 'Resource {} created --> {}'.format(resource.get_name(),
                                                      time.time() - t1)
            logger.info(msg)
        except ResourceConflictError:
            # If the create fails, the record already exists... so retrieve it...
            t1 = time.time()
            resource = Resource.find(
                merge(resc_dict['container'], resc_dict['name']))
            msg = "{} ::: Fetch Object -> {}".format(resource.get_name(),
                                                     time.time() - t1)
            logger.info(msg)

        # if the url is not correct then update
        # TODO: if the url is a block set that is stored internally then reduce its count so that it can be GC'd.
        # t3 = None
        if resource.url != url:
            t2 = time.time()
            # if url.startswith('cassandra://') : tidy up the stored block count...
            resource.update(url=url)
            t3 = time.time()
            msg = "{} ::: update -> {}".format(resource.get_name(), t3 - t2)
            logger.info(msg)

        # t1 = time.time()
        SearchIndex.reset(resource.uuid)
        SearchIndex.index(resource, ['name', 'metadata'])
Esempio n. 3
0
 def create_empty_data_object(self):
     data_object = DataObject.create(None)
     return data_object.uuid
Esempio n. 4
0
 def create_data_object(self, raw_data, metadata=None, create_ts=None, acl=None):
     data_object = DataObject.create(raw_data, settings.COMPRESS_UPLOADS,
                                     metadata=metadata,create_ts=create_ts,
                                     acl=acl)
     return data_object.uuid