class Checkpoint(CustomDjangoCassandraModel): __table_name__ = "davinci_checkpoint" source = columns.Text(partition_key=True) key = columns.Text(primary_key=True) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() data = columns.Text(required=False) class Meta: get_pk_field = "source" def get_data(self): return json.loads(self.data) def set_data(self, json_data): self.data = json.dumps(json_data, sort_keys=True, indent=4, default=default)
class BovespaAccount(CustomDjangoCassandraModel): __table_name__ = "bovespa_account" # ID of the company in B3 ccvm = columns.Text(partition_key=True) # Date of the account value period = columns.Date(primary_key=True, clustering_order="DESC") # The version of the account. The company could present different # versions of the files version = columns.Text(primary_key=True, clustering_order="DESC") # The account number. Ex. "1.01.01" number = columns.Text(primary_key=True, max_length=15) # Financial type account (instant/individual or consolidated) financial_info_type = columns.Text(primary_key=True, max_length=15) # Type of financial statement balance_type = columns.Text(max_length=15, required=True) # The account name. Ex. "Receita de Venda de Bens e/ou Serviços" name = columns.Text(max_length=200, required=True) # Company sector sector = columns.Integer(default=0, required=True) # The amount of the account amount = Decimal(required=True, max_digits=20, decimal_places=2) # The comments. Used for "DFP_BALANCE_DMPL" accounts, explaining the # meaning of the account: Shareholder's Equity, Accrued Profit/Loss, etc. comments = columns.Text() # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() class Meta: get_pk_field = "ccvm" def validate(self): super().validate() if self.financial_info_type not in FINANCIAL_INFO_TYPES: raise ValidationError("Invalid financial type [{0}] for account " "[{1} {2}]. Valid types are: {3}.".format( self.financial_info_type, self.number, self.name, FINANCIAL_INFO_TYPES)) if self.balance_type not in BALANCE_TYPES: raise ValidationError( "Invalid balance type [{0}]. Valid types are: {1}.".format( self.balance_type, BALANCE_TYPES))
class UserVideosModel(Model): """Model class that maps to the user_videos table""" __table_name__ = 'user_videos' user_id = columns.UUID(primary_key=True, db_field='userid') added_date = columns.DateTime(primary_key=True, clustering_order='DESC') video_id = columns.UUID(primary_key=True, clustering_order='ASC', db_field='videoid') name = columns.Text() preview_image_location = columns.Text()
class TaskMoreInfo(UserType): __type_name__ = "task_more_info" # from where the more info was created source = columns.Text() created_at = columns.DateTime() # details about the error details = columns.Text()
class BaseEntity(CustomDjangoCassandraModel): """ The common field that will be shared between all the managed entities """ __abstract__ = True # A unique identifier of the entity _id = columns.UUID(primary_key=True, default=uuid.uuid4) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() class Meta: get_pk_field = '_id'
class VideosModel(Model): """Model class that maps to the videos table""" __table_name__ = 'videos' video_id = columns.UUID(primary_key=True, db_field='videoid') user_id = columns.UUID(db_field='userid') name = columns.Text() description = columns.Text() location = columns.Text() location_type = columns.Integer() preview_image_location = columns.Text() tags = columns.Set(columns.Text) added_date = columns.DateTime()
class BovespaCompany(CustomDjangoCassandraModel): __table_name__ = "bovespa_company" # Force that all the values will reside in the seam node of the cluster entity_type = columns.Text(partition_key=True, default="company") # ID of the company in B3 ccvm = columns.Text(primary_key=True) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() company_name = columns.Text(required=True) cnpj = columns.Text() company_type = columns.Text() situation = columns.Text(required=True) granted_date = columns.Date() canceled_date = columns.Date() class Meta: get_pk_field = "entity_type" def validate(self): super().validate() if self.situation not in SITUATIONS: raise ValidationError( "Invalid situation [{0}]. Valid situations are: {1}.".format( self.situation, SITUATIONS))
def test_mintimeuuid_function(self): """ Tests that queries with helper functions are generated properly """ now = datetime.now() where = WhereClause('time', EqualsOperator(), functions.MinTimeUUID(now)) where.set_context_id(5) self.assertEqual(str(where), '"time" = MinTimeUUID(%(5)s)') ctx = {} where.update_context(ctx) self.assertEqual(ctx, {'5': columns.DateTime().to_database(now)})
class AllDatatypes(UserType): a = columns.Ascii() b = columns.BigInt() c = columns.Blob() d = columns.Boolean() e = columns.DateTime() f = columns.Decimal() g = columns.Double() h = columns.Float() i = columns.Inet() j = columns.Integer() k = columns.Text() l = columns.TimeUUID() m = columns.UUID() n = columns.VarInt()
class AllDatatypesModel(Model): id = columns.Integer(primary_key=True) a = columns.Ascii() b = columns.BigInt() c = columns.Blob() d = columns.Boolean() e = columns.DateTime() f = columns.Decimal() g = columns.Double() h = columns.Float() i = columns.Inet() j = columns.Integer() k = columns.Text() l = columns.TimeUUID() m = columns.UUID() n = columns.VarInt()
class TreeEntry(Model): """TreeEntry model""" # Partitioned by container, clustered by name, so all files for a directory # are in the same bucket and share the single instance of the static # container data container = columns.Text(partition_key=True) name = columns.Text(primary_key=True, partition_key=False) # The following set of columns are shared between all entries with the same # container name. i.e. it removes the need for a separate container table, # removes the need for extra lookups and avoids the container / objects # getting out of sync # # It also facilitates _some_ directory operations, e.g. removal. # # Renaming is still slow because the container and the name are primary # keys, so you have to create a new record and delete the old one... # It is suggested to use the batch system to make such an operation (more # or less) atomic. # container_metadata = columns.Map(columns.Text, columns.Text, static=True) container_uuid = columns.Text(default=default_cdmi_id, static=True) container_create_ts = columns.DateTime(static=True) container_modified_ts = columns.DateTime(static=True) container_acl = columns.Map(columns.Text, columns.UserDefinedType(Ace), static=True) # This is the actual directory entry per-se, i.e. unique per name.... # As with a conventional filesystem this is simply a reference to the 'real' # data where ACLs, system metadata &c are held. # per-record, but only for externals (see DataObject) metadata = columns.Map(columns.Text, columns.Text) create_ts = columns.DateTime(default=datetime.now) modified_ts = columns.DateTime() acl = columns.Map(columns.Text, columns.UserDefinedType(Ace)) mimetype = columns.Text() # Use the url schema (file:// , cdmi:// &c ) to route the request... # Only cdmi:// does anything everything else results in a redirect url = columns.Text() uuid = columns.Text() def add_default_acl(self): """Add read access to all authenticated users""" self.create_container_acl_list(["AUTHENTICATED@"], []) @classmethod def create(cls, **kwargs): """Create""" # if "mimetype" in kwargs: # metadata = kwargs.get('metadata', {}) # metadata["cdmi_mimetype"] = kwargs["mimetype"] # kwargs['metadata'] = meta_cdmi_to_cassandra(metadata) # del kwargs['mimetype'] new = super(TreeEntry, cls).create(**kwargs) return new def create_container_acl(self, acl_cql): """Replace the static acl with the given cql string """ cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement(u"""UPDATE tree_entry SET container_acl={} WHERE container=%s""".format(acl_cql)) session.execute(query, (self.container, )) def create_container_acl_cdmi(self, cdmi_acl): """""Create static ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.create_container_acl(cql_string) def create_container_acl_list(self, read_access, write_access): """""Create static ACL from lists of group uuids""" cql_string = acl_list_to_cql(read_access, write_access) self.create_container_acl(cql_string) def create_entry_acl(self, acl_cql): """Replace the acl with the given cql string """ cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement(u"""UPDATE tree_entry SET acl={} WHERE container=%s and name=%s""".format(acl_cql)) session.execute(query, ( self.container, self.name, )) def create_entry_acl_list(self, read_access, write_access): """""Create entry ACL from lists of group uuids""" cql_string = acl_list_to_cql(read_access, write_access) self.create_entry_acl(cql_string) def create_entry_acl_cdmi(self, cdmi_acl): """""Create entry ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.create_entry_acl(cql_string) def path(self): """Get the full path of the specific entry""" return merge(self.container, self.name) def update(self, **kwargs): """Update a collection""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) for arg in kwargs: # For static fields we can't use the name in the where condition if arg in static_fields: query = SimpleStatement(u"""UPDATE tree_entry SET {}=%s WHERE container=%s""".format(arg)) session.execute(query, (kwargs[arg], self.container)) else: query = SimpleStatement(u"""UPDATE tree_entry SET {}=%s WHERE container=%s and name=%s""".format(arg)) session.execute(query, (kwargs[arg], self.container, self.name)) return self def update_container_acl(self, acl_cql): """Update the static acl with the given cql string""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement( u"""UPDATE tree_entry SET container_acl=container_acl+{} WHERE container=%s""".format(acl_cql)) session.execute(query, (self.container, )) def update_container_acl_cdmi(self, cdmi_acl): """"Update static ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.update_container_acl(cql_string) def update_container_acl_list(self, read_access, write_access): """"Update static ACL from lists of group uuids""" cql_string = acl_list_to_cql(read_access, write_access) self.update_container_acl(cql_string) def update_entry_acl(self, acl_cql): """Update the acl with the given cql string""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement(u"""UPDATE tree_entry SET acl=acl+{} WHERE container=%s and name=%s""".format(acl_cql)) session.execute(query, ( self.container, self.name, )) def update_entry_acl_list(self, read_access, write_access): """"Update entry ACL from lists of group uuids""" cql_string = acl_list_to_cql(read_access, write_access) self.update_entry_acl(cql_string) def update_entry_acl_cdmi(self, cdmi_acl): """"Update entry ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.update_entry_acl(cql_string)
class BovespaCompanyFile(CustomDjangoCassandraModel): __table_name__ = "bovespa_company_file" # ID of the company in B3 ccvm = columns.Text(partition_key=True) # The type of document doc_type = columns.Text(max_length=3, primary_key=True) # The fiscal date the file is making reference. fiscal_date = columns.Date(primary_key=True, clustering_order="DESC") # The file version. The company could present different version of # the files for a specific fiscal period version = columns.Text(primary_key=True, clustering_order="DESC") status = columns.Text(default=FILE_STATUS_NOT_PROCESSED) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() # The protocol code associated with the file protocol = columns.Text(required=True) # When the documents were delivered delivery_date = columns.DateTime(required=True) # Why the files were delivered delivery_type = columns.Text(required=True) # The official name of the company company_name = columns.Text(required=True) # The company CNPJ company_cnpj = columns.Text(required=True) # The Fiscal Period decomposed into year, quarter, month # The year of the balance sheet # Ex. 2015 fiscal_date_y = columns.SmallInt() # The day of the year of the balance sheet # Ex. 2015 fiscal_date_yd = columns.SmallInt() # The quarter of the balance sheet # Ex. 1 fiscal_date_q = columns.SmallInt() # The month of the balance sheet # Ex. 1 fiscal_date_m = columns.SmallInt() # The day of the month of the balance sheet # Ex. 1 fiscal_date_md = columns.SmallInt() # The week of the year # Ex. 1 fiscal_date_w = columns.SmallInt() # The day of the week of the year # Ex. 1 fiscal_date_wd = columns.SmallInt() # Combination of YEAR-QUARTER in the form of 2018-Q1 # That allows us to facet results per quarter fiscal_date_yq = columns.Text() # Combination of YEAR-MONTH in the form of 2018-01 # That allows us to facet results per month fiscal_date_ym = columns.Text() # The url to the file that contains the information in bovespa. This # will be the url we will use to download the file from the source source_url = columns.Text(required=True) # The url to the file that contains the information. Is an url to a # repository of our own. The file has already beed downloaded and # persisted into a custom repository. We do not need to access the source file_url = columns.Text() # The internal name of the file file_name = columns.Text() # The extension of the filename file_extension = columns.Text() # Each key represents the name of the file in the ENER arquive. # The value is the original content converted into JSON - when possible - # and persisted as Text # content = KeyEncodedMap( # key_type=columns.Text, value_type=columns.Text) class Meta: get_pk_field = "ccvm" def validate(self): super().validate() if self.doc_type not in DOC_TYPES: raise ValidationError( "Invalid doc type [{0}]. Valid types are: {1}.".format( self.doc_type, DOC_TYPES)) if self.status not in FILE_STATUSES: raise ValidationError( "Invalid file status [{0}]. Valid statuses are: {1}.".format( self.status, FILE_STATUSES))
SITUATIONS = [SITUATION_CANCELLED, SITUATION_GRANTED] class {{ app_name | capfirst }}Resource(CustomDjangoCassandraModel): __table_name__ = "{{ app_name | lower }}_resource" # Force that all the values will reside in the seam node of the cluster _id = columns.UUID(partition_key=True, default=uuid.uuid4) # The owner of the data. Who own's the company data persisted user = columns.Text(primary_key=True) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() crawl_param = columns.Integer(required=True) name = columns.Text(required=True) situation = columns.Text(required=True) short_description = columns.Text() long_description = columns.Text()
class ApiAccess(CustomDjangoCassandraModel): """ A model to persist all the access made through the API """ __table_name__ = "caravaggio_api_access" year_month = columns.Text(partition_key=True) """ The combination of year and month for the timestamp associated with the request. Ex. 201901. We use this field as row keys. Each row will contain the access logs made during the month """ time_ms = columns.Integer(primary_key=True, clustering_order="DESC") """ Microseconds (to sort data within one row). """ id = columns.UUID(primary_key=True, default=uuid.uuid4) """ Monotonous UUID(NOT time - based UUID1) """ user = columns.UUID(required=True) """ The user that made the request. """ created_at = columns.DateTime(default=timezone.now) """ When was created the entity and the last modification date""" remote_address = InetAddress(required=True, index=True) """ The IP address of the user doing the request """ server_hostname = columns.Text(required=True) """ The name of the host that is processing the request """ request_method = columns.Text(required=True) """ The method of the request """ request_path = columns.Text(required=True) """ The absolute path of the request """ request_query_params = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) """ We save all the query params informed in the request as a map. We use caravaggio KeyEncodedMap that appends the field name to each of the keys in order to make them indexable by the Search Indexer. """ request_body = columns.Bytes(required=True) """ The body of the request made by the user""" response_status = columns.SmallInt(required=True) response_body = columns.Text(required=True) """ The JSON the server responded to the client. If the response is not a JSON response, the body will be replaced by a <<<Streaming>>> text if the request is in steamming, or <<<Not JSON>>> in other case. """ run_time = columns.Integer(required=True) latitude = columns.Float() longitude = columns.Float() coordinates = columns.Text() class Meta: get_pk_field = "year_month" def validate(self): super(ApiAccess, self).validate()
class DateTimeQueryTestModel(Model): user = columns.Integer(primary_key=True) day = columns.DateTime(primary_key=True) data = columns.Text()
class Task(CustomDjangoCassandraModel): """ Represents a task that could be an on demand task or a batch task. Args: task_id: the task id that is the unique partition key. user: The user that asked for the task, if it is an ondemand task. created_at: the date of the creation of the task. updated_at: the date that we last updated the task. is_deleted: controls if the data is deleted. status: representes the actual status of the task, could be: - 0 (Created) - 1 (Queued) - 2 (In Progress) - 3 (Finished) - 4 (Faulty) - 5 (Unknown) kind: the name of the crawler that will execute the task. params: the set of params used to execute the crawler command, this will be saved as Text. params_map: the exactly same content as `params` but saved on a way that we can search using solr (KeyEncodedMap). options: the set of options that is used to guide the crawler during the execution, this will be saved as text. options_map: the exactly same content as `options` but saved on a way that we can search using solr (KeyEncodedMap). times_performed: keep track on how many times the task was run. type: the type of the task, could be OnDemand(1) or Batch(2) """ __table_name__ = "davinci_task" _cassandra_consistency_level_read = ConsistencyLevel.ONE _cassandra_consistency_level_write = ConsistencyLevel.ALL # Force that all the values will reside in the seam node of the cluster task_id = columns.UUID(partition_key=True, default=uuid.uuid4) # The owner of the data. Who own's the company data persisted user = columns.Text() # When was created the entity and the last modification date created_at = columns.DateTime(default=timezone.now, primary_key=True, clustering_order="DESC") updated_at = columns.DateTime(default=timezone.now) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) status = columns.SmallInt(default=STATUS_CREATED) kind = columns.Text(required=True) params_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) params = columns.Text(required=True) options_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) options = columns.Text(required=False) times_performed = columns.SmallInt(default=0) type = columns.SmallInt(default=ON_DEMAND_TASK) more_info = columns.List(value_type=UserDefinedType(TaskMoreInfo)) differences_from_last_version = columns.Text() inserted_fields = columns.List(value_type=columns.Text) updated_fields = columns.List(value_type=columns.Text) deleted_fields = columns.List(value_type=columns.Text) changed_fields = columns.List(value_type=columns.Text) logging_task = columns.Boolean(default=False) class Meta: get_pk_field = "task_id" def validate(self): super().validate() if self.type not in ALL_TASK_TYPES: raise ValidationError("Invalid task type [{0}]. Valid types are: " "{1}.".format(self.type, ALL_TASK_TYPES)) if self.status not in ALL_STATUS: raise ValidationError( "Invalid task status [{0}]. Valid status are: " "{1}.".format(self.status, ALL_STATUS))
class Company(CustomDjangoCassandraModel): """ A public traded company """ __table_name__ = "company" # A unique identifier of the entity _id = columns.UUID(partition_key=True, default=uuid.uuid4) # The owner of the data. Who own's the company data persisted user = columns.Text(primary_key=True) # When was created the entity and the last modification date created_at = columns.DateTime(default=datetime.utcnow) updated_at = columns.DateTime(default=datetime.utcnow) # Controls if the entity is active or has been deleted is_deleted = columns.Boolean(default=False) deleted_reason = columns.Text() # The name of the company name = columns.Text(required=True) # A short description about the company short_description = columns.Text() # The company domain (e.g. preseries.com) domain = columns.Text(max_length=50) # The date when the company was founded foundation_date = columns.Date() # The date of the latest funding round last_round = columns.Date() # The total number of funding rounds round_notes = columns.Text() # Country of the company # ISO 3166-1 alpha 3 code country_code = columns.Text(min_length=3, max_length=3) # The stock trading symbol stock_symbol = columns.Text() # Contact email of the company contact_email = columns.Text() # The IDs of the founders of the company founders = columns.List(value_type=columns.UUID) # Address of the headquarters of the company address = UserDefinedType(Address) # A list of specialties of the company specialties = columns.List(value_type=columns.Text) # The counters of the latest followers in twitter # (example of list of integers) latest_twitter_followers = columns.List(value_type=columns.Integer) # A field that represent a map of key-value # We use caravaggio KeyEncodedMap that appends the field name # to each of the keys in order to make them indexable by the # Search Indexer. websites = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text) # A field that represents a raw JSON with the crawler configurations, each # key is a reference to a crawler crawler_config = columns.Text() # A field that represents a raw JSON content extra_data = columns.Text() latitude = columns.Float() longitude = columns.Float() coordinates = columns.Text() class Meta: get_pk_field = '_id' def validate(self): super(Company, self).validate() if self.name == "test": raise ValidationError('The company name cannot be test')
class DataObject(Model): """ The DataObject represents actual data objects, the tree structure merely references it. Each partition key gathers together all the data under one partition (the CDMI ID ) and the object properties are represented using static columns (one instance per partition) It has a similar effect to a join to a properties table, except the properties are stored with the rest of the partition This is an 'efficient' model optimised for Cassandra's quirks. N.B. by default Cassandra compresses its data ( using LZW ), so we get that for free.""" # The 'name' of the object uuid = columns.Text(default=default_cdmi_id, required=True, partition_key=True) ##################### # These columns are the same (shared) between all entries with same id # (they use the static attribute , [ like an inode or a header ]) ##################### checksum = columns.Text(static=True) size = columns.BigInt(default=0, static=True) metadata = columns.Map(columns.Text, columns.Text, static=True) mimetype = columns.Text(static=True) alt_url = columns.Set(columns.Text, static=True) create_ts = columns.DateTime(default=datetime.now, static=True) modified_ts = columns.DateTime(default=datetime.now, static=True) type = columns.Text(required=False, static=True, default='UNKNOWN') acl = columns.Map(columns.Text, columns.UserDefinedType(Ace), static=True) # A general aid to integrity ... treepath = columns.Text(static=True, required=False) ##################### # And 'clever' bit -- 'here' data, These will be the only per-record-fields # in the partition (i.e. object) # So the datastructure looks like a header , with an ordered list of blobs ##################### # This is the 'clustering' key... sequence_number = columns.Integer(primary_key=True, partition_key=False) blob = columns.Blob(required=False) compressed = columns.Boolean(default=False) ##################### @classmethod def append_chunk(cls, uuid, raw_data, sequence_number, compressed=False): """Create a new blob for an existing data_object""" if compressed: f = StringIO() z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) z.writestr("data", raw_data) z.close() data = f.getvalue() f.close() else: data = raw_data data_object = cls(uuid=uuid, sequence_number=sequence_number, blob=data, compressed=compressed) data_object.save() return data_object def chunk_content(self): """ Yields the content for the driver's URL, if any a chunk at a time. The value yielded is the size of the chunk and the content chunk itself. """ entries = DataObject.objects.filter(uuid=self.uuid) for entry in entries: if entry.compressed: data = StringIO(entry.blob) z = zipfile.ZipFile(data, 'r') content = z.read("data") data.close() z.close() yield content else: yield entry.blob @classmethod def create(cls, raw_data, compressed=False, metadata=None, create_ts=None, acl=None): """data: initial data""" new_id = default_cdmi_id() now = datetime.now() if compressed: f = StringIO() z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED) z.writestr("data", raw_data) z.close() data = f.getvalue() f.close() else: data = raw_data kwargs = { "uuid": new_id, "sequence_number": 0, "blob": data, "compressed": compressed, "modified_ts": now } if metadata: kwargs['metadata'] = metadata if create_ts: kwargs['create_ts'] = create_ts else: kwargs['create_ts'] = now if acl: kwargs['acl'] = acl new = super(DataObject, cls).create(**kwargs) return new def create_acl(self, acl_cql): """Replace the static acl with the given cql string""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement(u"""UPDATE data_object SET acl = {} WHERE uuid=%s""".format(acl_cql)) session.execute(query, (self.uuid,)) def create_acl_cdmi(self, cdmi_acl): """""Create entry ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.create_acl(cql_string) def create_acl_list(self, read_access, write_access): """Create ACL from two lists of groups id, existing ACL are replaced""" cql_string = acl_list_to_cql(read_access, write_access) self.create_acl(cql_string) @classmethod def delete_id(cls, uuid): """Delete all blobs for the specified uuid""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement("""DELETE FROM data_object WHERE uuid=%s""") session.execute(query, (uuid,)) @classmethod def find(cls, uuid): """Find an object by uuid""" entries = cls.objects.filter(uuid=uuid) if not entries: return None else: return entries.first() def update(self, **kwargs): """Update a data object""" cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) for arg in kwargs: # For static fields we can't use the name in the where condition if arg in static_fields: query = SimpleStatement("""UPDATE data_object SET {}=%s WHERE uuid=%s""".format(arg)) session.execute(query, (kwargs[arg], self.uuid)) else: print """UPDATE data_object SET {}=%s WHERE uuid=%s and sequence_number=%s""".format(arg) query = SimpleStatement("""UPDATE data_object SET {}=%s WHERE uuid=%s and sequence_number=%s""".format(arg)) session.execute(query, (kwargs[arg], self.uuid, self.sequence_number)) return self def update_acl(self, acl_cql): """Update the static acl with the given cql string """ cfg = get_config(None) session = connection.get_session() keyspace = cfg.get('KEYSPACE', 'indigo') session.set_keyspace(keyspace) query = SimpleStatement(u"""UPDATE data_object SET acl = acl + {} WHERE uuid=%s""".format(acl_cql)) session.execute(query, (self.uuid,)) def update_acl_cdmi(self, cdmi_acl): """"Update entry ACL from a cdmi object (list of dict)""" cql_string = acl_cdmi_to_cql(cdmi_acl) self.update_acl(cql_string) def update_acl_list(self, read_access, write_access): """Update ACL from two lists of groups id, existing ACL are replaced""" cql_string = acl_list_to_cql(read_access, write_access) self.update_acl(cql_string)