Exemple #1
0
class Checkpoint(CustomDjangoCassandraModel):

    __table_name__ = "davinci_checkpoint"

    source = columns.Text(partition_key=True)

    key = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    data = columns.Text(required=False)

    class Meta:
        get_pk_field = "source"

    def get_data(self):
        return json.loads(self.data)

    def set_data(self, json_data):
        self.data = json.dumps(json_data, sort_keys=True, indent=4, default=default)
class BovespaAccount(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_account"

    # ID of the company in B3
    ccvm = columns.Text(partition_key=True)

    # Date of the account value
    period = columns.Date(primary_key=True, clustering_order="DESC")

    # The version of the account. The company could present different
    # versions of the files
    version = columns.Text(primary_key=True, clustering_order="DESC")

    # The account number. Ex. "1.01.01"
    number = columns.Text(primary_key=True, max_length=15)

    # Financial type account (instant/individual or consolidated)
    financial_info_type = columns.Text(primary_key=True, max_length=15)

    # Type of financial statement
    balance_type = columns.Text(max_length=15, required=True)

    # The account name. Ex. "Receita de Venda de Bens e/ou Serviços"
    name = columns.Text(max_length=200, required=True)

    # Company sector
    sector = columns.Integer(default=0, required=True)

    # The amount of the account
    amount = Decimal(required=True, max_digits=20, decimal_places=2)

    # The comments. Used for "DFP_BALANCE_DMPL" accounts, explaining the
    # meaning of the account: Shareholder's Equity, Accrued Profit/Loss, etc.
    comments = columns.Text()

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    class Meta:
        get_pk_field = "ccvm"

    def validate(self):
        super().validate()

        if self.financial_info_type not in FINANCIAL_INFO_TYPES:
            raise ValidationError("Invalid financial type [{0}] for account "
                                  "[{1} {2}]. Valid types are: {3}.".format(
                                      self.financial_info_type, self.number,
                                      self.name, FINANCIAL_INFO_TYPES))

        if self.balance_type not in BALANCE_TYPES:
            raise ValidationError(
                "Invalid balance type [{0}]. Valid types are: {1}.".format(
                    self.balance_type, BALANCE_TYPES))
class UserVideosModel(Model):
    """Model class that maps to the user_videos table"""
    __table_name__ = 'user_videos'
    user_id = columns.UUID(primary_key=True, db_field='userid')
    added_date = columns.DateTime(primary_key=True, clustering_order='DESC')
    video_id = columns.UUID(primary_key=True,
                            clustering_order='ASC',
                            db_field='videoid')
    name = columns.Text()
    preview_image_location = columns.Text()
class TaskMoreInfo(UserType):
    __type_name__ = "task_more_info"

    # from where the more info was created
    source = columns.Text()

    created_at = columns.DateTime()

    # details about the error
    details = columns.Text()
class BaseEntity(CustomDjangoCassandraModel):
    """
    The common field that will be shared between all the managed entities
    """
    __abstract__ = True

    # A unique identifier of the entity
    _id = columns.UUID(primary_key=True, default=uuid.uuid4)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    class Meta:
        get_pk_field = '_id'
class VideosModel(Model):
    """Model class that maps to the videos table"""
    __table_name__ = 'videos'
    video_id = columns.UUID(primary_key=True, db_field='videoid')
    user_id = columns.UUID(db_field='userid')
    name = columns.Text()
    description = columns.Text()
    location = columns.Text()
    location_type = columns.Integer()
    preview_image_location = columns.Text()
    tags = columns.Set(columns.Text)
    added_date = columns.DateTime()
Exemple #7
0
class BovespaCompany(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_company"

    # Force that all the values will reside in the seam node of the cluster
    entity_type = columns.Text(partition_key=True, default="company")

    # ID of the company in B3
    ccvm = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    company_name = columns.Text(required=True)

    cnpj = columns.Text()

    company_type = columns.Text()

    situation = columns.Text(required=True)

    granted_date = columns.Date()
    canceled_date = columns.Date()

    class Meta:
        get_pk_field = "entity_type"

    def validate(self):
        super().validate()

        if self.situation not in SITUATIONS:
            raise ValidationError(
                "Invalid situation [{0}]. Valid situations are: {1}.".format(
                    self.situation, SITUATIONS))
    def test_mintimeuuid_function(self):
        """
        Tests that queries with helper functions are generated properly
        """
        now = datetime.now()
        where = WhereClause('time', EqualsOperator(),
                            functions.MinTimeUUID(now))
        where.set_context_id(5)

        self.assertEqual(str(where), '"time" = MinTimeUUID(%(5)s)')
        ctx = {}
        where.update_context(ctx)
        self.assertEqual(ctx, {'5': columns.DateTime().to_database(now)})
class AllDatatypes(UserType):
    a = columns.Ascii()
    b = columns.BigInt()
    c = columns.Blob()
    d = columns.Boolean()
    e = columns.DateTime()
    f = columns.Decimal()
    g = columns.Double()
    h = columns.Float()
    i = columns.Inet()
    j = columns.Integer()
    k = columns.Text()
    l = columns.TimeUUID()
    m = columns.UUID()
    n = columns.VarInt()
 class AllDatatypesModel(Model):
     id = columns.Integer(primary_key=True)
     a = columns.Ascii()
     b = columns.BigInt()
     c = columns.Blob()
     d = columns.Boolean()
     e = columns.DateTime()
     f = columns.Decimal()
     g = columns.Double()
     h = columns.Float()
     i = columns.Inet()
     j = columns.Integer()
     k = columns.Text()
     l = columns.TimeUUID()
     m = columns.UUID()
     n = columns.VarInt()
Exemple #11
0
class TreeEntry(Model):
    """TreeEntry model"""

    # Partitioned by container, clustered by name, so all files for a directory
    # are in the same bucket and share the single instance of the static
    # container data
    container = columns.Text(partition_key=True)
    name = columns.Text(primary_key=True, partition_key=False)

    # The following set of columns are shared between all entries with the same
    # container name. i.e. it removes the need for a separate container table,
    # removes the need for extra lookups and avoids the container / objects
    # getting out of sync
    #
    # It also facilitates _some_ directory operations, e.g. removal.
    #
    # Renaming is still slow because the container and the name are primary
    # keys, so you have to create a new record and delete the old one...
    # It is suggested to use the batch system to make such an operation (more
    # or less) atomic.
    #
    container_metadata = columns.Map(columns.Text, columns.Text, static=True)
    container_uuid = columns.Text(default=default_cdmi_id, static=True)
    container_create_ts = columns.DateTime(static=True)
    container_modified_ts = columns.DateTime(static=True)
    container_acl = columns.Map(columns.Text,
                                columns.UserDefinedType(Ace),
                                static=True)

    # This is the actual directory entry per-se, i.e. unique per name....
    # As with a conventional filesystem this is simply a reference to the 'real'
    # data where ACLs, system metadata &c are held.
    # per-record, but only for externals (see DataObject)
    metadata = columns.Map(columns.Text, columns.Text)
    create_ts = columns.DateTime(default=datetime.now)
    modified_ts = columns.DateTime()
    acl = columns.Map(columns.Text, columns.UserDefinedType(Ace))
    mimetype = columns.Text()
    # Use the url schema (file:// , cdmi:// &c ) to route the request...
    # Only cdmi:// does anything everything else results in a redirect
    url = columns.Text()
    uuid = columns.Text()

    def add_default_acl(self):
        """Add read access to all authenticated users"""
        self.create_container_acl_list(["AUTHENTICATED@"], [])

    @classmethod
    def create(cls, **kwargs):
        """Create"""
        #         if "mimetype" in kwargs:
        #             metadata = kwargs.get('metadata', {})
        #             metadata["cdmi_mimetype"] = kwargs["mimetype"]
        #             kwargs['metadata'] = meta_cdmi_to_cassandra(metadata)
        #             del kwargs['mimetype']
        new = super(TreeEntry, cls).create(**kwargs)
        return new

    def create_container_acl(self, acl_cql):
        """Replace the static acl with the given cql string
        """
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE tree_entry SET container_acl={} 
            WHERE container=%s""".format(acl_cql))
        session.execute(query, (self.container, ))

    def create_container_acl_cdmi(self, cdmi_acl):
        """""Create static ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.create_container_acl(cql_string)

    def create_container_acl_list(self, read_access, write_access):
        """""Create static ACL from  lists of group uuids"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.create_container_acl(cql_string)

    def create_entry_acl(self, acl_cql):
        """Replace the acl with the given cql string
        """
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE tree_entry SET acl={} 
            WHERE container=%s and name=%s""".format(acl_cql))
        session.execute(query, (
            self.container,
            self.name,
        ))

    def create_entry_acl_list(self, read_access, write_access):
        """""Create entry ACL from  lists of group uuids"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.create_entry_acl(cql_string)

    def create_entry_acl_cdmi(self, cdmi_acl):
        """""Create entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.create_entry_acl(cql_string)

    def path(self):
        """Get the full path of the specific entry"""
        return merge(self.container, self.name)

    def update(self, **kwargs):
        """Update a collection"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        for arg in kwargs:
            # For static fields we can't use the name in the where condition
            if arg in static_fields:
                query = SimpleStatement(u"""UPDATE tree_entry SET {}=%s
                    WHERE container=%s""".format(arg))
                session.execute(query, (kwargs[arg], self.container))
            else:
                query = SimpleStatement(u"""UPDATE tree_entry SET {}=%s
                    WHERE container=%s and name=%s""".format(arg))
                session.execute(query,
                                (kwargs[arg], self.container, self.name))
        return self

    def update_container_acl(self, acl_cql):
        """Update the static acl with the given cql string"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(
            u"""UPDATE tree_entry SET container_acl=container_acl+{} 
            WHERE container=%s""".format(acl_cql))
        session.execute(query, (self.container, ))

    def update_container_acl_cdmi(self, cdmi_acl):
        """"Update static ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.update_container_acl(cql_string)

    def update_container_acl_list(self, read_access, write_access):
        """"Update static ACL from  lists of group uuids"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.update_container_acl(cql_string)

    def update_entry_acl(self, acl_cql):
        """Update the acl with the given cql string"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE tree_entry SET acl=acl+{} 
            WHERE container=%s and name=%s""".format(acl_cql))
        session.execute(query, (
            self.container,
            self.name,
        ))

    def update_entry_acl_list(self, read_access, write_access):
        """"Update entry ACL from  lists of group uuids"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.update_entry_acl(cql_string)

    def update_entry_acl_cdmi(self, cdmi_acl):
        """"Update entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.update_entry_acl(cql_string)
Exemple #12
0
class BovespaCompanyFile(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_company_file"

    # ID of the company in B3
    ccvm = columns.Text(partition_key=True)

    # The type of document
    doc_type = columns.Text(max_length=3, primary_key=True)

    # The fiscal date the file is making reference.
    fiscal_date = columns.Date(primary_key=True, clustering_order="DESC")

    # The file version. The company could present different version of
    # the files for a specific fiscal period
    version = columns.Text(primary_key=True, clustering_order="DESC")

    status = columns.Text(default=FILE_STATUS_NOT_PROCESSED)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    # The protocol code associated with the file
    protocol = columns.Text(required=True)

    # When the documents were delivered
    delivery_date = columns.DateTime(required=True)

    # Why the files were delivered
    delivery_type = columns.Text(required=True)

    # The official name of the company
    company_name = columns.Text(required=True)

    # The company CNPJ
    company_cnpj = columns.Text(required=True)

    # The Fiscal Period decomposed into year, quarter, month
    # The year of the balance sheet
    # Ex. 2015
    fiscal_date_y = columns.SmallInt()

    # The day of the year of the balance sheet
    # Ex. 2015
    fiscal_date_yd = columns.SmallInt()

    # The quarter of the balance sheet
    # Ex. 1
    fiscal_date_q = columns.SmallInt()

    # The month of the balance sheet
    # Ex. 1
    fiscal_date_m = columns.SmallInt()

    # The day of the month of the balance sheet
    # Ex. 1
    fiscal_date_md = columns.SmallInt()

    # The week of the year
    # Ex. 1
    fiscal_date_w = columns.SmallInt()

    # The day of the week of the year
    # Ex. 1
    fiscal_date_wd = columns.SmallInt()

    # Combination of YEAR-QUARTER in the form of 2018-Q1
    # That allows us to facet results per quarter
    fiscal_date_yq = columns.Text()

    # Combination of YEAR-MONTH in the form of 2018-01
    # That allows us to facet results per month
    fiscal_date_ym = columns.Text()

    # The url to the file that contains the information in bovespa. This
    # will be the url we will use to download the file from the source
    source_url = columns.Text(required=True)

    # The url to the file that contains the information. Is an url to a
    # repository of our own. The file has already beed downloaded and
    # persisted into a custom repository. We do not need to access the source
    file_url = columns.Text()

    # The internal name of the file
    file_name = columns.Text()

    # The extension of the filename
    file_extension = columns.Text()

    # Each key represents the name of the file in the ENER arquive.
    # The value is the original content converted into JSON - when possible -
    # and persisted as Text
    # content = KeyEncodedMap(
    #    key_type=columns.Text, value_type=columns.Text)

    class Meta:
        get_pk_field = "ccvm"

    def validate(self):
        super().validate()

        if self.doc_type not in DOC_TYPES:
            raise ValidationError(
                "Invalid doc type [{0}]. Valid types are: {1}.".format(
                    self.doc_type, DOC_TYPES))

        if self.status not in FILE_STATUSES:
            raise ValidationError(
                "Invalid file status [{0}]. Valid statuses are: {1}.".format(
                    self.status, FILE_STATUSES))
SITUATIONS = [SITUATION_CANCELLED, SITUATION_GRANTED]


class {{ app_name | capfirst }}Resource(CustomDjangoCassandraModel):

    __table_name__ = "{{ app_name | lower }}_resource"

    # Force that all the values will reside in the seam node of the cluster
    _id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    crawl_param = columns.Integer(required=True)

    name = columns.Text(required=True)

    situation = columns.Text(required=True)

    short_description = columns.Text()

    long_description = columns.Text()
class ApiAccess(CustomDjangoCassandraModel):
    """ A model to persist all the access made through the API

    """

    __table_name__ = "caravaggio_api_access"

    year_month = columns.Text(partition_key=True)
    """ The combination of year and month for the timestamp associated
    with the request. Ex. 201901.
    We use this field as row keys. Each row will contain the
    access logs made during the month

    """

    time_ms = columns.Integer(primary_key=True, clustering_order="DESC")
    """ Microseconds (to sort data within one row).

    """

    id = columns.UUID(primary_key=True, default=uuid.uuid4)
    """ Monotonous UUID(NOT time - based UUID1)

    """

    user = columns.UUID(required=True)
    """ The user that made the request.

    """

    created_at = columns.DateTime(default=timezone.now)
    """ When was created the entity and the last modification date"""

    remote_address = InetAddress(required=True, index=True)
    """ The IP address of the user doing the request

    """

    server_hostname = columns.Text(required=True)
    """ The name of the host that is processing the request

    """

    request_method = columns.Text(required=True)
    """ The method of the request

    """

    request_path = columns.Text(required=True)
    """ The absolute path of the request

    """

    request_query_params = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)
    """ We save all the query params informed in the request as a map.

    We use caravaggio KeyEncodedMap that appends the field name to each of
    the keys in order to make them indexable by the Search Indexer.
    """

    request_body = columns.Bytes(required=True)
    """ The body of the request made by the user"""

    response_status = columns.SmallInt(required=True)

    response_body = columns.Text(required=True)
    """ The JSON the server responded to the client. If the response is not
    a JSON response, the body will be replaced by a <<<Streaming>>> text if
    the request is in steamming, or  <<<Not JSON>>> in other case.

    """

    run_time = columns.Integer(required=True)

    latitude = columns.Float()
    longitude = columns.Float()

    coordinates = columns.Text()

    class Meta:
        get_pk_field = "year_month"

    def validate(self):
        super(ApiAccess, self).validate()
Exemple #15
0
class DateTimeQueryTestModel(Model):

    user = columns.Integer(primary_key=True)
    day = columns.DateTime(primary_key=True)
    data = columns.Text()
class Task(CustomDjangoCassandraModel):
    """
    Represents a task that could be an on demand task or a batch task.

    Args:
        task_id: the task id that is the unique partition key.
        user: The user that asked for the task, if it is an ondemand task.
        created_at: the date of the creation of the task.
        updated_at: the date that we last updated the task.
        is_deleted: controls if the data is deleted.
        status: representes the actual status of the task, could be:
            - 0 (Created)
            - 1 (Queued)
            - 2 (In Progress)
            - 3 (Finished)
            - 4 (Faulty)
            - 5 (Unknown)
        kind: the name of the crawler that will execute the task.
        params: the set of params used to execute the crawler command, this
        will be saved as Text.
        params_map: the exactly same content as `params` but saved on a way
        that we can search using solr (KeyEncodedMap).
        options: the set of options that is used to guide the crawler during
        the execution, this will be saved as text.
        options_map: the exactly same content as `options` but saved on a way
        that we can search using solr (KeyEncodedMap).
        times_performed: keep track on how many times the task was run.
        type: the type of the task, could be OnDemand(1) or Batch(2)
    """

    __table_name__ = "davinci_task"
    _cassandra_consistency_level_read = ConsistencyLevel.ONE
    _cassandra_consistency_level_write = ConsistencyLevel.ALL

    # Force that all the values will reside in the seam node of the cluster
    task_id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text()

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=timezone.now,
                                  primary_key=True,
                                  clustering_order="DESC")
    updated_at = columns.DateTime(default=timezone.now)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)

    status = columns.SmallInt(default=STATUS_CREATED)

    kind = columns.Text(required=True)

    params_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    params = columns.Text(required=True)

    options_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    options = columns.Text(required=False)

    times_performed = columns.SmallInt(default=0)

    type = columns.SmallInt(default=ON_DEMAND_TASK)

    more_info = columns.List(value_type=UserDefinedType(TaskMoreInfo))

    differences_from_last_version = columns.Text()

    inserted_fields = columns.List(value_type=columns.Text)

    updated_fields = columns.List(value_type=columns.Text)

    deleted_fields = columns.List(value_type=columns.Text)

    changed_fields = columns.List(value_type=columns.Text)

    logging_task = columns.Boolean(default=False)

    class Meta:
        get_pk_field = "task_id"

    def validate(self):
        super().validate()

        if self.type not in ALL_TASK_TYPES:
            raise ValidationError("Invalid task type [{0}]. Valid types are: "
                                  "{1}.".format(self.type, ALL_TASK_TYPES))

        if self.status not in ALL_STATUS:
            raise ValidationError(
                "Invalid task status [{0}]. Valid status are: "
                "{1}.".format(self.status, ALL_STATUS))
class Company(CustomDjangoCassandraModel):
    """
    A public traded company
    """
    __table_name__ = "company"

    # A unique identifier of the entity
    _id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    # The name of the company
    name = columns.Text(required=True)

    # A short description about the company
    short_description = columns.Text()

    # The company domain (e.g. preseries.com)
    domain = columns.Text(max_length=50)

    # The date when the company was founded
    foundation_date = columns.Date()

    # The date of the latest funding round
    last_round = columns.Date()

    # The total number of funding rounds
    round_notes = columns.Text()

    # Country of the company
    # ISO 3166-1 alpha 3 code
    country_code = columns.Text(min_length=3, max_length=3)

    # The stock trading symbol
    stock_symbol = columns.Text()

    # Contact email of the company
    contact_email = columns.Text()

    # The IDs of the founders of the company
    founders = columns.List(value_type=columns.UUID)

    # Address of the headquarters of the company
    address = UserDefinedType(Address)

    # A list of specialties of the company
    specialties = columns.List(value_type=columns.Text)

    # The counters of the latest followers in twitter
    #  (example of list of integers)
    latest_twitter_followers = columns.List(value_type=columns.Integer)

    # A field that represent a map of key-value
    # We use caravaggio KeyEncodedMap that appends the field name
    # to each of the keys in order to make them indexable by the
    # Search Indexer.
    websites = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    # A field that represents a raw JSON with the crawler configurations, each
    # key is a reference to a crawler
    crawler_config = columns.Text()

    # A field that represents a raw JSON content
    extra_data = columns.Text()

    latitude = columns.Float()
    longitude = columns.Float()

    coordinates = columns.Text()

    class Meta:
        get_pk_field = '_id'

    def validate(self):
        super(Company, self).validate()
        if self.name == "test":
            raise ValidationError('The company name cannot be test')
Exemple #18
0
class DataObject(Model):
    """ The DataObject represents actual data objects, the tree structure
    merely references it.

    Each partition key gathers together all the data under one partition (the
    CDMI ID ) and the object properties are represented using static columns
    (one instance per partition)
    It has a similar effect to a join to a properties table, except the
    properties are stored with the rest of the partition

    This is an 'efficient' model optimised for Cassandra's quirks.

    N.B. by default Cassandra compresses its data ( using LZW ), so we get that
    for free."""
    # The 'name' of the object
    uuid = columns.Text(default=default_cdmi_id, required=True,
                        partition_key=True)
    #####################
    # These columns are the same (shared) between all entries with same id
    # (they use the static attribute , [ like an inode or a header ])
    #####################
    checksum = columns.Text(static=True)
    size = columns.BigInt(default=0, static=True)
    metadata = columns.Map(columns.Text, columns.Text, static=True)
    mimetype = columns.Text(static=True)
    alt_url = columns.Set(columns.Text, static=True)
    create_ts = columns.DateTime(default=datetime.now, static=True)
    modified_ts = columns.DateTime(default=datetime.now, static=True)
    type = columns.Text(required=False, static=True, default='UNKNOWN')
    acl = columns.Map(columns.Text, columns.UserDefinedType(Ace), static=True)
    # A general aid to integrity ...
    treepath = columns.Text(static=True, required=False)
    #####################
    # And 'clever' bit -- 'here' data, These will be the only per-record-fields
    # in the partition (i.e. object)
    # So the datastructure looks like a header , with an ordered list of blobs
    #####################
    # This is the 'clustering' key...
    sequence_number = columns.Integer(primary_key=True, partition_key=False)
    blob = columns.Blob(required=False)
    compressed = columns.Boolean(default=False)
    #####################

    @classmethod
    def append_chunk(cls, uuid, raw_data, sequence_number, compressed=False):
        """Create a new blob for an existing data_object"""
        if compressed:
            f = StringIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data
        data_object = cls(uuid=uuid,
                          sequence_number=sequence_number,
                          blob=data,
                          compressed=compressed)
        data_object.save()
        return data_object


    def chunk_content(self):
        """
        Yields the content for the driver's URL, if any
        a chunk at a time.  The value yielded is the size of
        the chunk and the content chunk itself.
        """
        entries = DataObject.objects.filter(uuid=self.uuid)
        for entry in entries:
            if entry.compressed:
                data = StringIO(entry.blob)
                z = zipfile.ZipFile(data, 'r')
                content = z.read("data")
                data.close()
                z.close()
                yield content
            else:
                yield entry.blob


    @classmethod
    def create(cls, raw_data, compressed=False, metadata=None, create_ts=None, acl=None):
        """data: initial data"""
        new_id = default_cdmi_id()
        now = datetime.now()
        if compressed:
            f = StringIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data
        
        kwargs = {
            "uuid": new_id,
            "sequence_number": 0,
            "blob": data,
            "compressed": compressed,
            "modified_ts": now
        }
        if metadata:
            kwargs['metadata'] = metadata
        if create_ts:
            kwargs['create_ts'] = create_ts
        else:
            kwargs['create_ts'] = now
        if acl:
            kwargs['acl'] = acl
        new = super(DataObject, cls).create(**kwargs)
        return new


    def create_acl(self, acl_cql):
        """Replace the static acl with the given cql string"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE data_object SET acl = {}
            WHERE uuid=%s""".format(acl_cql))
        session.execute(query, (self.uuid,))


    def create_acl_cdmi(self, cdmi_acl):
        """""Create entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.create_acl(cql_string)


    def create_acl_list(self, read_access, write_access):
        """Create ACL from two lists of groups id, existing ACL are replaced"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.create_acl(cql_string)


    @classmethod
    def delete_id(cls, uuid):
        """Delete all blobs for the specified uuid"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement("""DELETE FROM data_object WHERE uuid=%s""")
        session.execute(query, (uuid,))


    @classmethod
    def find(cls, uuid):
        """Find an object by uuid"""
        entries = cls.objects.filter(uuid=uuid)
        if not entries:
            return None
        else:
            return entries.first()


    def update(self, **kwargs):
        """Update a data object"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        for arg in kwargs:
            # For static fields we can't use the name in the where condition
            if arg in static_fields:
                query = SimpleStatement("""UPDATE data_object SET {}=%s
                    WHERE uuid=%s""".format(arg))
                session.execute(query, (kwargs[arg], self.uuid))
            else:
                print """UPDATE data_object SET {}=%s
                    WHERE uuid=%s and sequence_number=%s""".format(arg)
                query = SimpleStatement("""UPDATE data_object SET {}=%s
                    WHERE uuid=%s and sequence_number=%s""".format(arg))
                session.execute(query, (kwargs[arg], self.uuid, self.sequence_number))
        return self


    def update_acl(self, acl_cql):
        """Update the static acl with the given cql string
        """
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE data_object SET acl = acl + {}
            WHERE uuid=%s""".format(acl_cql))
        session.execute(query, (self.uuid,))


    def update_acl_cdmi(self, cdmi_acl):
        """"Update entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.update_acl(cql_string)


    def update_acl_list(self, read_access, write_access):
        """Update ACL from two lists of groups id, existing ACL are replaced"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.update_acl(cql_string)