コード例 #1
0
class BovespaCompanyFile(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_company_file"

    # ID of the company in B3
    ccvm = columns.Text(partition_key=True)

    # The type of document
    doc_type = columns.Text(max_length=3, primary_key=True)

    # The fiscal date the file is making reference.
    fiscal_date = columns.Date(primary_key=True, clustering_order="DESC")

    # The file version. The company could present different version of
    # the files for a specific fiscal period
    version = columns.Text(primary_key=True, clustering_order="DESC")

    status = columns.Text(default=FILE_STATUS_NOT_PROCESSED)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    # The protocol code associated with the file
    protocol = columns.Text(required=True)

    # When the documents were delivered
    delivery_date = columns.DateTime(required=True)

    # Why the files were delivered
    delivery_type = columns.Text(required=True)

    # The official name of the company
    company_name = columns.Text(required=True)

    # The company CNPJ
    company_cnpj = columns.Text(required=True)

    # The Fiscal Period decomposed into year, quarter, month
    # The year of the balance sheet
    # Ex. 2015
    fiscal_date_y = columns.SmallInt()

    # The day of the year of the balance sheet
    # Ex. 2015
    fiscal_date_yd = columns.SmallInt()

    # The quarter of the balance sheet
    # Ex. 1
    fiscal_date_q = columns.SmallInt()

    # The month of the balance sheet
    # Ex. 1
    fiscal_date_m = columns.SmallInt()

    # The day of the month of the balance sheet
    # Ex. 1
    fiscal_date_md = columns.SmallInt()

    # The week of the year
    # Ex. 1
    fiscal_date_w = columns.SmallInt()

    # The day of the week of the year
    # Ex. 1
    fiscal_date_wd = columns.SmallInt()

    # Combination of YEAR-QUARTER in the form of 2018-Q1
    # That allows us to facet results per quarter
    fiscal_date_yq = columns.Text()

    # Combination of YEAR-MONTH in the form of 2018-01
    # That allows us to facet results per month
    fiscal_date_ym = columns.Text()

    # The url to the file that contains the information in bovespa. This
    # will be the url we will use to download the file from the source
    source_url = columns.Text(required=True)

    # The url to the file that contains the information. Is an url to a
    # repository of our own. The file has already beed downloaded and
    # persisted into a custom repository. We do not need to access the source
    file_url = columns.Text()

    # The internal name of the file
    file_name = columns.Text()

    # The extension of the filename
    file_extension = columns.Text()

    # Each key represents the name of the file in the ENER arquive.
    # The value is the original content converted into JSON - when possible -
    # and persisted as Text
    # content = KeyEncodedMap(
    #    key_type=columns.Text, value_type=columns.Text)

    class Meta:
        get_pk_field = "ccvm"

    def validate(self):
        super().validate()

        if self.doc_type not in DOC_TYPES:
            raise ValidationError(
                "Invalid doc type [{0}]. Valid types are: {1}.".format(
                    self.doc_type, DOC_TYPES))

        if self.status not in FILE_STATUSES:
            raise ValidationError(
                "Invalid file status [{0}]. Valid statuses are: {1}.".format(
                    self.status, FILE_STATUSES))
コード例 #2
0
 class Allv4Datatypes(UserType):
     a = columns.Date()
     b = columns.SmallInt()
     c = columns.Time()
     d = columns.TinyInt()
コード例 #3
0
 class v4DatatypesModel(Model):
     id = columns.Integer(primary_key=True)
     a = columns.Date()
     b = columns.SmallInt()
     c = columns.Time()
     d = columns.TinyInt()
コード例 #4
0
class Task(CustomDjangoCassandraModel):
    """
    Represents a task that could be an on demand task or a batch task.

    Args:
        task_id: the task id that is the unique partition key.
        user: The user that asked for the task, if it is an ondemand task.
        created_at: the date of the creation of the task.
        updated_at: the date that we last updated the task.
        is_deleted: controls if the data is deleted.
        status: representes the actual status of the task, could be:
            - 0 (Created)
            - 1 (Queued)
            - 2 (In Progress)
            - 3 (Finished)
            - 4 (Faulty)
            - 5 (Unknown)
        kind: the name of the crawler that will execute the task.
        params: the set of params used to execute the crawler command, this
        will be saved as Text.
        params_map: the exactly same content as `params` but saved on a way
        that we can search using solr (KeyEncodedMap).
        options: the set of options that is used to guide the crawler during
        the execution, this will be saved as text.
        options_map: the exactly same content as `options` but saved on a way
        that we can search using solr (KeyEncodedMap).
        times_performed: keep track on how many times the task was run.
        type: the type of the task, could be OnDemand(1) or Batch(2)
    """

    __table_name__ = "davinci_task"
    _cassandra_consistency_level_read = ConsistencyLevel.ONE
    _cassandra_consistency_level_write = ConsistencyLevel.ALL

    # Force that all the values will reside in the seam node of the cluster
    task_id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text()

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=timezone.now,
                                  primary_key=True,
                                  clustering_order="DESC")
    updated_at = columns.DateTime(default=timezone.now)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)

    status = columns.SmallInt(default=STATUS_CREATED)

    kind = columns.Text(required=True)

    params_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    params = columns.Text(required=True)

    options_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    options = columns.Text(required=False)

    times_performed = columns.SmallInt(default=0)

    type = columns.SmallInt(default=ON_DEMAND_TASK)

    more_info = columns.List(value_type=UserDefinedType(TaskMoreInfo))

    differences_from_last_version = columns.Text()

    inserted_fields = columns.List(value_type=columns.Text)

    updated_fields = columns.List(value_type=columns.Text)

    deleted_fields = columns.List(value_type=columns.Text)

    changed_fields = columns.List(value_type=columns.Text)

    logging_task = columns.Boolean(default=False)

    class Meta:
        get_pk_field = "task_id"

    def validate(self):
        super().validate()

        if self.type not in ALL_TASK_TYPES:
            raise ValidationError("Invalid task type [{0}]. Valid types are: "
                                  "{1}.".format(self.type, ALL_TASK_TYPES))

        if self.status not in ALL_STATUS:
            raise ValidationError(
                "Invalid task status [{0}]. Valid status are: "
                "{1}.".format(self.status, ALL_STATUS))
コード例 #5
0
class ApiAccess(CustomDjangoCassandraModel):
    """ A model to persist all the access made through the API

    """

    __table_name__ = "caravaggio_api_access"

    year_month = columns.Text(partition_key=True)
    """ The combination of year and month for the timestamp associated
    with the request. Ex. 201901.
    We use this field as row keys. Each row will contain the
    access logs made during the month

    """

    time_ms = columns.Integer(primary_key=True, clustering_order="DESC")
    """ Microseconds (to sort data within one row).

    """

    id = columns.UUID(primary_key=True, default=uuid.uuid4)
    """ Monotonous UUID(NOT time - based UUID1)

    """

    user = columns.UUID(required=True)
    """ The user that made the request.

    """

    created_at = columns.DateTime(default=timezone.now)
    """ When was created the entity and the last modification date"""

    remote_address = InetAddress(required=True, index=True)
    """ The IP address of the user doing the request

    """

    server_hostname = columns.Text(required=True)
    """ The name of the host that is processing the request

    """

    request_method = columns.Text(required=True)
    """ The method of the request

    """

    request_path = columns.Text(required=True)
    """ The absolute path of the request

    """

    request_query_params = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)
    """ We save all the query params informed in the request as a map.

    We use caravaggio KeyEncodedMap that appends the field name to each of
    the keys in order to make them indexable by the Search Indexer.
    """

    request_body = columns.Bytes(required=True)
    """ The body of the request made by the user"""

    response_status = columns.SmallInt(required=True)

    response_body = columns.Text(required=True)
    """ The JSON the server responded to the client. If the response is not
    a JSON response, the body will be replaced by a <<<Streaming>>> text if
    the request is in steamming, or  <<<Not JSON>>> in other case.

    """

    run_time = columns.Integer(required=True)

    latitude = columns.Float()
    longitude = columns.Float()

    coordinates = columns.Text()

    class Meta:
        get_pk_field = "year_month"

    def validate(self):
        super(ApiAccess, self).validate()