class BovespaAccount(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_account"

    # ID of the company in B3
    ccvm = columns.Text(partition_key=True)

    # Date of the account value
    period = columns.Date(primary_key=True, clustering_order="DESC")

    # The version of the account. The company could present different
    # versions of the files
    version = columns.Text(primary_key=True, clustering_order="DESC")

    # The account number. Ex. "1.01.01"
    number = columns.Text(primary_key=True, max_length=15)

    # Financial type account (instant/individual or consolidated)
    financial_info_type = columns.Text(primary_key=True, max_length=15)

    # Type of financial statement
    balance_type = columns.Text(max_length=15, required=True)

    # The account name. Ex. "Receita de Venda de Bens e/ou Serviços"
    name = columns.Text(max_length=200, required=True)

    # Company sector
    sector = columns.Integer(default=0, required=True)

    # The amount of the account
    amount = Decimal(required=True, max_digits=20, decimal_places=2)

    # The comments. Used for "DFP_BALANCE_DMPL" accounts, explaining the
    # meaning of the account: Shareholder's Equity, Accrued Profit/Loss, etc.
    comments = columns.Text()

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    class Meta:
        get_pk_field = "ccvm"

    def validate(self):
        super().validate()

        if self.financial_info_type not in FINANCIAL_INFO_TYPES:
            raise ValidationError("Invalid financial type [{0}] for account "
                                  "[{1} {2}]. Valid types are: {3}.".format(
                                      self.financial_info_type, self.number,
                                      self.name, FINANCIAL_INFO_TYPES))

        if self.balance_type not in BALANCE_TYPES:
            raise ValidationError(
                "Invalid balance type [{0}]. Valid types are: {1}.".format(
                    self.balance_type, BALANCE_TYPES))
Ejemplo n.º 2
0
class Checkpoint(CustomDjangoCassandraModel):

    __table_name__ = "davinci_checkpoint"

    source = columns.Text(partition_key=True)

    key = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    data = columns.Text(required=False)

    class Meta:
        get_pk_field = "source"

    def get_data(self):
        return json.loads(self.data)

    def set_data(self, json_data):
        self.data = json.dumps(json_data, sort_keys=True, indent=4, default=default)
Ejemplo n.º 3
0
class AllDatatypes(UserType):
    a = columns.Ascii()
    b = columns.BigInt()
    c = columns.Blob()
    d = columns.Boolean()
    e = columns.DateTime()
    f = columns.Decimal()
    g = columns.Double()
    h = columns.Float()
    i = columns.Inet()
    j = columns.Integer()
    k = columns.Text()
    l = columns.TimeUUID()
    m = columns.UUID()
    n = columns.VarInt()
Ejemplo n.º 4
0
 class AllDatatypesModel(Model):
     id = columns.Integer(primary_key=True)
     a = columns.Ascii()
     b = columns.BigInt()
     c = columns.Blob()
     d = columns.Boolean()
     e = columns.DateTime()
     f = columns.Decimal()
     g = columns.Double()
     h = columns.Float()
     i = columns.Inet()
     j = columns.Integer()
     k = columns.Text()
     l = columns.TimeUUID()
     m = columns.UUID()
     n = columns.VarInt()
Ejemplo n.º 5
0
class BaseEntity(CustomDjangoCassandraModel):
    """
    The common field that will be shared between all the managed entities
    """
    __abstract__ = True

    # A unique identifier of the entity
    _id = columns.UUID(primary_key=True, default=uuid.uuid4)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    class Meta:
        get_pk_field = '_id'
Ejemplo n.º 6
0
class BovespaCompany(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_company"

    # Force that all the values will reside in the seam node of the cluster
    entity_type = columns.Text(partition_key=True, default="company")

    # ID of the company in B3
    ccvm = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    company_name = columns.Text(required=True)

    cnpj = columns.Text()

    company_type = columns.Text()

    situation = columns.Text(required=True)

    granted_date = columns.Date()
    canceled_date = columns.Date()

    class Meta:
        get_pk_field = "entity_type"

    def validate(self):
        super().validate()

        if self.situation not in SITUATIONS:
            raise ValidationError(
                "Invalid situation [{0}]. Valid situations are: {1}.".format(
                    self.situation, SITUATIONS))
Ejemplo n.º 7
0
class DataObject(Model):
    """ The DataObject represents actual data objects, the hierarchy
    references it.

    Each partition key gathers together all the data under one partition (the
    CDMI ID ) and the object properties are represented using static columns
    (one instance per partition)
    It has a similar effect to a join to a properties table, except the
    properties are stored with the rest of the partition

    This is an 'efficient' model optimised for Cassandra's quirks.
    
    
    :param uuid: A CDMI uuid (partition key)
    :type uuid: :class:`columns.Text`
    :param sequence_number: This is the 'clustering' key, a data is split in
      several blobs, with the same id qnd different sequence number.
    :type sequence_number: :class:`columns.Integer`
    :param checksum: A checksum to verify the integrity od the data
    :type checksum: :class:`columns.Text`
    :param size: Total size of the data
    :type size: :class:`columns.Integer`
    :param blob: The binary bits to store
    :type blob: :class:`columns.Blob`
    :param compressed: An option to compress the data bits
    :type compressed: :class:`columns.Boolean`
    """

    # The 'name' of the object
    uuid = columns.Text(default=default_cdmi_id,
                        required=True,
                        partition_key=True)
    # This is the 'clustering' key
    sequence_number = columns.Integer(primary_key=True, partition_key=False)
    # These columns are shared between all entries with same id (static attributes)
    checksum = columns.Text(static=True)
    size = columns.BigInt(default=0, static=True)
    blob = columns.Blob(required=False)
    compressed = columns.Boolean(default=False)

    @classmethod
    def append_chunk(cls, uuid, sequence_number, raw_data, compressed=False):
        """
        Create a new blob for an existing data_object
        
        :param uuid: A CDMI uuid
        :type uuid: str
        :param sequence_number: The sequence number, this has to be different
        :type sequence_number: int
        :param raw_data: the binary bits
        :type raw_data: str
        :param compressed: An option to compress the data bits
        :type compressed: bool, optional
        """
        if compressed:
            f = BytesIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data
        data_object = cls(uuid=uuid,
                          sequence_number=sequence_number,
                          blob=data,
                          compressed=compressed)
        data_object.save()
        return data_object

    def chunk_content(self):
        """
        Yields the content for a generator, one chunk at a time. 
        
        :return: A chunk of data bits
        :rtype: str
        """
        entries = DataObject.objects.filter(uuid=self.uuid)
        for entry in entries:
            if entry.compressed:
                data = BytesIO(entry.blob)
                z = zipfile.ZipFile(data, "r")
                content = z.read("data")
                data.close()
                z.close()
                yield content
            else:
                yield entry.blob

    @classmethod
    def create(cls, raw_data, compressed=False):
        """
        Create a Data Object blob with the content passed in parameter
        
        :param raw_data: The binary bits to store
        :type raw_data: str
        :param compressed: An option to compress the data bits
        :type compressed: bool, optional
        
        :return: The new Data Object
        :rtype: :class:`radon.model.DataObject`
        """
        new_id = default_cdmi_id()
        if compressed:
            f = BytesIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data

        kwargs = {
            "uuid": new_id,
            "sequence_number": 0,
            "blob": data,
            "compressed": compressed,
            "size": len(data)
        }
        new = super(DataObject, cls).create(**kwargs)
        return new

    @classmethod
    def delete_id(cls, uuid):
        """
        Delete all blobs for the specified uuid
        
        :param uuid: A CDMI uuid
        :type uuid: str
        """
        session = connection.get_session()
        keyspace = radon.cfg.dse_keyspace
        session.set_keyspace(keyspace)
        query = SimpleStatement("""DELETE FROM data_object WHERE uuid=%s""")
        session.execute(query, (uuid, ))

    @classmethod
    def find(cls, uuid):
        """
        Find an object by uuid
        
        :param uuid: A CDMI uuid
        :type uuid: str
        
        :return: The first DataObject of the partition corresponding to the 
          UUID
        :rtype: :class:`radon.model.DataObject`
        """
        entries = cls.objects.filter(uuid=uuid)
        if not entries:
            return None
        else:
            return entries.first()

    def get_url(self):
        """
        Get the URL of the Data Object that we use as reference in the 
        hierarchy
        
        :return: An URL that informs that the data is in Cassandra + the UUID
        :rtype: str
        """
        return radon.cfg.protocol_cassandra + self.uuid
Ejemplo n.º 8
0
class Notification(Model):
    """Notification Model"""
    date = columns.Text(default=default_date, partition_key=True)
    when = columns.TimeUUID(primary_key=True,
                            default=default_time,
                            clustering_order="DESC")
    # The type of operation (Create, Delete, Update, Index, Move...)
    operation = columns.Text(primary_key=True)
    # The type of the object concerned (Collection, Resource, User, Group, ...)
    object_type = columns.Text(primary_key=True)
    # The uuid of the object concerned, the key used to find the corresponding
    # object (path, uuid, ...)
    object_uuid = columns.Text(primary_key=True)
    
    # The user who initiates the operation
    username = columns.Text()
    # True if the corresponding worklow has been executed correctly (for Move
    # or indexing for instance)
    # True if nothing has to be done
    processed = columns.Boolean()
    # The payload of the message which is sent to MQTT
    payload = columns.Text()


    def __unicode__(self):
        return unicode(self.html)


    @classmethod
    def create_collection(cls, username, path, payload):
        """Create a new collection and publish the message on MQTT"""
        new = cls.new(operation=OP_CREATE,
                      object_type=OBJ_COLLECTION,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_CREATE, OBJ_COLLECTION, path, payload)
        return new


    @classmethod
    def create_group(cls, username, uuid, payload):
        """Create a new group and publish the message on MQTT"""
        new = cls.new(operation=OP_CREATE,
                      object_type=OBJ_GROUP,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_CREATE, OBJ_GROUP, uuid, payload)
        return new


    @classmethod
    def create_resource(cls, username, path, payload):
        """Create a new resource and publish the message on MQTT"""
        new = cls.new(operation=OP_CREATE,
                      object_type=OBJ_RESOURCE,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_CREATE, OBJ_RESOURCE, path, payload)
        return new


    @classmethod
    def create_user(cls, username, uuid, payload):
        """Create a new user and publish the message on MQTT"""
        new = cls.new(operation=OP_CREATE,
                      object_type=OBJ_USER,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_CREATE, OBJ_USER, uuid, payload)
        return new


    @classmethod
    def delete_collection(cls, username, path, payload):
        """Delete a collection and publish the message on MQTT"""
        new = cls.new(operation=OP_DELETE,
                      object_type=OBJ_COLLECTION,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_DELETE, OBJ_COLLECTION, path, payload)
        return new


    @classmethod
    def delete_group(cls, username, uuid, payload):
        """Delete a group and publish the message on MQTT"""
        new = cls.new(operation=OP_DELETE,
                      object_type=OBJ_GROUP,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_DELETE, OBJ_GROUP, uuid, payload)
        return new


    @classmethod
    def delete_resource(cls, username, path, payload):
        """Delete a resource and publish the message on MQTT"""
        new = cls.new(operation=OP_DELETE,
                      object_type=OBJ_RESOURCE,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_DELETE, OBJ_RESOURCE, path, payload)
        return new


    @classmethod
    def delete_user(cls, username, uuid, payload):
        """Delete a user and publish the message on MQTT"""
        new = cls.new(operation=OP_DELETE,
                      object_type=OBJ_USER,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_DELETE, OBJ_USER, uuid, payload)
        return new


    def tmpl(self):
        return TEMPLATES[self.operation][self.object_type]


    @classmethod
    def mqtt_publish(cls, notification, operation, object_type, object_uuid, payload):
        topic = u'{0}/{1}/{2}'.format(operation, object_type, object_uuid)
        # Clean up the topic by removing superfluous slashes.
        topic = '/'.join(filter(None, topic.split('/')))
        # Remove MQTT wildcards from the topic. Corner-case: If the collection name is made entirely of # and + and a
        # script is set to run on such a collection name. But that's what you get if you use stupid names for things.
        topic = topic.replace('#', '').replace('+', '')
        logging.info(u'Publishing on topic "{0}"'.format(topic))
        try:
            publish.single(topic, payload)
        except:
            notification.update(processed=False)
            logging.error(u'Problem while publishing on topic "{0}"'.format(topic))


    @classmethod
    def new(cls, **kwargs):
        """Create"""
        new = super(Notification, cls).create(**kwargs)
        return new


    @classmethod
    def recent(cls, count=20):
        """Return the last activities"""
#         return Notification.objects.filter(date__in=last_x_days())\
#             .order_by("-when").all().limit(count)
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        # I couldn't find how to disable paging in cqlengine in the "model" view
        # so I create the cal query directly
        query = SimpleStatement(u"""SELECT * from Notification WHERE
            date IN ({})
            ORDER BY when DESC
            limit {}""".format(
                ",".join(["'%s'" % el for el in last_x_days()]),
                count)
            )
        # Disable paging for this query (we use IN and ORDER BY in the same
        # query
        query.fetch_size = None
        res = []
        for row in session.execute(query):
            res.append(Notification(**row).to_dict())
        return res

    def to_dict(self, user=None):
        """Return a dictionary which describes a notification for the web ui"""
        data = {
            'date': self.date,
            'when': self.when,
            'operation': self.operation,
            'object_type': self.object_type,
            'object_uuid': self.object_uuid,
            'username': self.username,
            'tmpl': self.tmpl(),
            'payload': json.loads(self.payload)
        }
        return data


    @classmethod
    def update_collection(cls, username, path, payload):
        """Update a collection and publish the message on MQTT"""
        new = cls.new(operation=OP_UPDATE,
                      object_type=OBJ_COLLECTION,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_UPDATE, OBJ_COLLECTION, path, payload)
        return new


    @classmethod
    def update_group(cls, username, uuid, payload):
        """Update a group and publish the message on MQTT"""
        new = cls.new(operation=OP_UPDATE,
                      object_type=OBJ_GROUP,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_UPDATE, OBJ_GROUP, uuid, payload)
        return new


    @classmethod
    def update_resource(cls, username, path, payload):
        """Update a resource and publish the message on MQTT"""
        new = cls.new(operation=OP_UPDATE,
                      object_type=OBJ_RESOURCE,
                      object_uuid=path,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_UPDATE, OBJ_RESOURCE, path, payload)
        return new


    @classmethod
    def update_user(cls, username, uuid, payload):
        """Update a user and publish the message on MQTT"""
        new = cls.new(operation=OP_UPDATE,
                      object_type=OBJ_USER,
                      object_uuid=uuid,
                      username=username,
                      processed=True,
                      payload=payload)
        cls.mqtt_publish(new, OP_UPDATE, OBJ_USER, uuid, payload)
        return new
Ejemplo n.º 9
0
class BovespaCompanyFile(CustomDjangoCassandraModel):

    __table_name__ = "bovespa_company_file"

    # ID of the company in B3
    ccvm = columns.Text(partition_key=True)

    # The type of document
    doc_type = columns.Text(max_length=3, primary_key=True)

    # The fiscal date the file is making reference.
    fiscal_date = columns.Date(primary_key=True, clustering_order="DESC")

    # The file version. The company could present different version of
    # the files for a specific fiscal period
    version = columns.Text(primary_key=True, clustering_order="DESC")

    status = columns.Text(default=FILE_STATUS_NOT_PROCESSED)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    # The protocol code associated with the file
    protocol = columns.Text(required=True)

    # When the documents were delivered
    delivery_date = columns.DateTime(required=True)

    # Why the files were delivered
    delivery_type = columns.Text(required=True)

    # The official name of the company
    company_name = columns.Text(required=True)

    # The company CNPJ
    company_cnpj = columns.Text(required=True)

    # The Fiscal Period decomposed into year, quarter, month
    # The year of the balance sheet
    # Ex. 2015
    fiscal_date_y = columns.SmallInt()

    # The day of the year of the balance sheet
    # Ex. 2015
    fiscal_date_yd = columns.SmallInt()

    # The quarter of the balance sheet
    # Ex. 1
    fiscal_date_q = columns.SmallInt()

    # The month of the balance sheet
    # Ex. 1
    fiscal_date_m = columns.SmallInt()

    # The day of the month of the balance sheet
    # Ex. 1
    fiscal_date_md = columns.SmallInt()

    # The week of the year
    # Ex. 1
    fiscal_date_w = columns.SmallInt()

    # The day of the week of the year
    # Ex. 1
    fiscal_date_wd = columns.SmallInt()

    # Combination of YEAR-QUARTER in the form of 2018-Q1
    # That allows us to facet results per quarter
    fiscal_date_yq = columns.Text()

    # Combination of YEAR-MONTH in the form of 2018-01
    # That allows us to facet results per month
    fiscal_date_ym = columns.Text()

    # The url to the file that contains the information in bovespa. This
    # will be the url we will use to download the file from the source
    source_url = columns.Text(required=True)

    # The url to the file that contains the information. Is an url to a
    # repository of our own. The file has already beed downloaded and
    # persisted into a custom repository. We do not need to access the source
    file_url = columns.Text()

    # The internal name of the file
    file_name = columns.Text()

    # The extension of the filename
    file_extension = columns.Text()

    # Each key represents the name of the file in the ENER arquive.
    # The value is the original content converted into JSON - when possible -
    # and persisted as Text
    # content = KeyEncodedMap(
    #    key_type=columns.Text, value_type=columns.Text)

    class Meta:
        get_pk_field = "ccvm"

    def validate(self):
        super().validate()

        if self.doc_type not in DOC_TYPES:
            raise ValidationError(
                "Invalid doc type [{0}]. Valid types are: {1}.".format(
                    self.doc_type, DOC_TYPES))

        if self.status not in FILE_STATUSES:
            raise ValidationError(
                "Invalid file status [{0}]. Valid statuses are: {1}.".format(
                    self.status, FILE_STATUSES))
class {{ app_name | capfirst }}Resource(CustomDjangoCassandraModel):

    __table_name__ = "{{ app_name | lower }}_resource"

    # Force that all the values will reside in the seam node of the cluster
    _id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    crawl_param = columns.Integer(required=True)

    name = columns.Text(required=True)

    situation = columns.Text(required=True)

    short_description = columns.Text()

    long_description = columns.Text()

    # The date when the company was founded
    foundation_date = columns.Date()
Ejemplo n.º 11
0
class Company(CustomDjangoCassandraModel):
    """
    A public traded company
    """
    __table_name__ = "company"

    # A unique identifier of the entity
    _id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text(primary_key=True)

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=datetime.utcnow)
    updated_at = columns.DateTime(default=datetime.utcnow)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)
    deleted_reason = columns.Text()

    # The name of the company
    name = columns.Text(required=True)

    # A short description about the company
    short_description = columns.Text()

    # The company domain (e.g. preseries.com)
    domain = columns.Text(max_length=50)

    # The date when the company was founded
    foundation_date = columns.Date()

    # The date of the latest funding round
    last_round = columns.Date()

    # The total number of funding rounds
    round_notes = columns.Text()

    # Country of the company
    # ISO 3166-1 alpha 3 code
    country_code = columns.Text(min_length=3, max_length=3)

    # The stock trading symbol
    stock_symbol = columns.Text()

    # Contact email of the company
    contact_email = columns.Text()

    # The IDs of the founders of the company
    founders = columns.List(value_type=columns.UUID)

    # Address of the headquarters of the company
    address = UserDefinedType(Address)

    # A list of specialties of the company
    specialties = columns.List(value_type=columns.Text)

    # The counters of the latest followers in twitter
    #  (example of list of integers)
    latest_twitter_followers = columns.List(value_type=columns.Integer)

    # A field that represent a map of key-value
    # We use caravaggio KeyEncodedMap that appends the field name
    # to each of the keys in order to make them indexable by the
    # Search Indexer.
    websites = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    # A field that represents a raw JSON with the crawler configurations, each
    # key is a reference to a crawler
    crawler_config = columns.Text()

    # A field that represents a raw JSON content
    extra_data = columns.Text()

    latitude = columns.Float()
    longitude = columns.Float()

    coordinates = columns.Text()

    class Meta:
        get_pk_field = '_id'

    def validate(self):
        super(Company, self).validate()
        if self.name == "test":
            raise ValidationError('The company name cannot be test')
Ejemplo n.º 12
0
class Task(CustomDjangoCassandraModel):
    """
    Represents a task that could be an on demand task or a batch task.

    Args:
        task_id: the task id that is the unique partition key.
        user: The user that asked for the task, if it is an ondemand task.
        created_at: the date of the creation of the task.
        updated_at: the date that we last updated the task.
        is_deleted: controls if the data is deleted.
        status: representes the actual status of the task, could be:
            - 0 (Created)
            - 1 (Queued)
            - 2 (In Progress)
            - 3 (Finished)
            - 4 (Faulty)
            - 5 (Unknown)
        kind: the name of the crawler that will execute the task.
        params: the set of params used to execute the crawler command, this
        will be saved as Text.
        params_map: the exactly same content as `params` but saved on a way
        that we can search using solr (KeyEncodedMap).
        options: the set of options that is used to guide the crawler during
        the execution, this will be saved as text.
        options_map: the exactly same content as `options` but saved on a way
        that we can search using solr (KeyEncodedMap).
        times_performed: keep track on how many times the task was run.
        type: the type of the task, could be OnDemand(1) or Batch(2)
    """

    __table_name__ = "davinci_task"
    _cassandra_consistency_level_read = ConsistencyLevel.ONE
    _cassandra_consistency_level_write = ConsistencyLevel.ALL

    # Force that all the values will reside in the seam node of the cluster
    task_id = columns.UUID(partition_key=True, default=uuid.uuid4)

    # The owner of the data. Who own's the company data persisted
    user = columns.Text()

    # When was created the entity and the last modification date
    created_at = columns.DateTime(default=timezone.now,
                                  primary_key=True,
                                  clustering_order="DESC")
    updated_at = columns.DateTime(default=timezone.now)

    # Controls if the entity is active or has been deleted
    is_deleted = columns.Boolean(default=False)

    status = columns.SmallInt(default=STATUS_CREATED)

    kind = columns.Text(required=True)

    params_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    params = columns.Text(required=True)

    options_map = KeyEncodedMap(key_type=columns.Text, value_type=columns.Text)

    options = columns.Text(required=False)

    times_performed = columns.SmallInt(default=0)

    type = columns.SmallInt(default=ON_DEMAND_TASK)

    more_info = columns.List(value_type=UserDefinedType(TaskMoreInfo))

    differences_from_last_version = columns.Text()

    inserted_fields = columns.List(value_type=columns.Text)

    updated_fields = columns.List(value_type=columns.Text)

    deleted_fields = columns.List(value_type=columns.Text)

    changed_fields = columns.List(value_type=columns.Text)

    logging_task = columns.Boolean(default=False)

    class Meta:
        get_pk_field = "task_id"

    def validate(self):
        super().validate()

        if self.type not in ALL_TASK_TYPES:
            raise ValidationError("Invalid task type [{0}]. Valid types are: "
                                  "{1}.".format(self.type, ALL_TASK_TYPES))

        if self.status not in ALL_STATUS:
            raise ValidationError(
                "Invalid task status [{0}]. Valid status are: "
                "{1}.".format(self.status, ALL_STATUS))
class TestModel(Model):

    id = columns.UUID(primary_key=True, default=lambda: uuid4())
    count = columns.Integer()
    text = columns.Text(required=False)
    a_bool = columns.Boolean(default=False)
Ejemplo n.º 14
0
class User(Model):
    """User Model
    
    This is used to store a Radon user
    
    :param uuid: A uuid associated to the user
    :type uuid: :class:`columns.Text`
    :param name: The user name, used as the primary key
    :type name: :class:`columns.Text`
    :param email: The user email
    :type email: :class:`columns.Text`
    :param password: The user password, stored hashed
    :type password: :class:`columns.Text`
    :param administrator: A boolean if the user has admin access
    :type administrator: :class:`columns.Boolean`
    :param active: A boolean if the user is active or not
    :type active: :class:`columns.Boolean`
    :param ldap: A boolean if the user password has to be checked on a 
      LDAP server
    :type ldap: :class:`columns.Boolean`
    :param groups: A list of group names
    :type groups: :class:`columns.List`
    """

    uuid = columns.Text(default=default_uuid)
    name = columns.Text(primary_key=True, required=True)
    email = columns.Text(required=True)
    password = columns.Text(required=True)
    administrator = columns.Boolean(required=True, default=False)
    active = columns.Boolean(required=True, default=True)
    ldap = columns.Boolean(required=True, default=False)
    groups = columns.List(columns.Text, index=True)

    def add_group(self, groupname, username=None):
        """
        Add the user to a group
        
        :param groupname: The group to be added to
        :type groupname: str
        :param username: the name of the user who made the action
        :type username: str, optional
        """
        self.add_groups([groupname], username)

    def add_groups(self, ls_group, username=None):
        """
        Add the user to a list of groups
        
        :param ls_group: The groups to be added to
        :type groupname: List[str]
        :param username: the name of the user who made the action
        :type username: str, optional
        """
        new_groups = self.get_groups() + ls_group
        # remove duplicate
        new_groups = list(set(new_groups))
        self.update(groups=new_groups, username=username)

    def authenticate(self, password):
        """
        Check user password against an existing hash (hash)
    
        :param password: the password we want to test (plain)
        :type password: str
        
        :return: a boolean which indicate if the password is correct
        :rtype: bool
        """
        if self.active:
            if self.ldap:
                return verify_ldap_password(self.name, password)
            else:
                return verify_password(password, self.password)
        return False

    @classmethod
    def create(cls, **kwargs):
        """Create a user

        We intercept the create call so that we can correctly
        hash the password into an unreadable form
        
        :param name: the name of the user
        :type name: str
        :param password: The plain password to encrypt
        :type password: str
        :param username: the name of the user who made the action
        :type username: str, optional
        
        :return: The new created user
        :rtype: :class:`radon.model.User`
        """
        # username is the name of the user who initiated the call, it has to
        # be removed for the Cassandra call
        if "username" in kwargs:
            username = kwargs["username"]
            del kwargs["username"]
        else:
            username = radon.cfg.sys_lib_user
        kwargs["password"] = encrypt_password(kwargs["password"])

        if cls.objects.filter(name=kwargs["name"]).count():
            raise UserConflictError(kwargs["name"])

        user = super(User, cls).create(**kwargs)

        state = user.mqtt_get_state()
        payload = user.mqtt_payload({}, state)
        Notification.create_user(username, user.name, payload)
        return user

    def delete(self, username=None):
        """
        Delete the user in the database.
        
        :param username: the name of the user who made the action
        :type username: str, optional
        """
        state = self.mqtt_get_state()
        super(User, self).delete()
        payload = self.mqtt_payload(state, {})
        # username is the id of the user who did the operation
        # user.uuid is the id of the new user
        Notification.delete_user(username, self.name, payload)

    @classmethod
    def find(cls, name):
        """
        Find a user from his name.
        
        :param name: the name of the user
        :type name: str
        
        :return: The user which has been found
        :rtype: :class:`radon.model.User`
        """
        return cls.objects.filter(name=name).first()

    def get_groups(self):
        """
        Return the list of group names for the user
        
        :return: The list of groups
        :rtype: List[str]
        """
        return self.groups

    def is_active(self):
        """
        Check if the user is active
        
        :return: The user status
        :rtype: bool
        """
        return self.active

    def is_authenticated(self):
        """
        Check if the user is authenticated
        
        :return: The user status
        :rtype: bool
        """
        return True

    def mqtt_get_state(self):
        """
        Get the user state that will be used in the payload
        
        :return: The user state as a dictionary
        :rtype: dict
        """
        payload = dict()
        payload["uuid"] = self.uuid
        payload["name"] = self.name
        payload["email"] = self.email
        payload["active"] = self.active
        payload["groups"] = [g.name for g in Group.find_all(self.groups)]
        return payload

    def mqtt_payload(self, pre_state, post_state):
        """
        Get a string version of the payload of the message, with the pre and
        post states. The pre and post states are stored in a dictionary and
        dumped in a JSON string.
        
        :param pre_state: The dictionary which describes the state of the user
          before a modification
        :type pre_state: dict
        :param post_state: The dictionary which describes the state of the user
          after a modification
        :type post_state: dict
        
        :return: The payload as a JSON string
        :rtype: str
        """
        payload = dict()
        payload["pre"] = pre_state
        payload["post"] = post_state
        return json.dumps(payload, default=datetime_serializer)

    def rm_group(self, groupname, username=None):
        """
        Remove the user from a group.
        
        :param groupname: The group to be removed from
        :type groupname: str
        :param username: the name of the user who made the action
        :type username: str, optional
        """
        self.rm_groups([groupname])

    def rm_groups(self, ls_group, username=None):
        """
        Remove the user from a list of groups.
        
        :param groupname: The groups to be removed from
        :type groupname: List[str]
        :param username: the name of the user who made the action
        :type username: str, optional
        """
        new_groups = set(self.get_groups()) - set(ls_group)
        # remove duplicate
        new_groups = list(set(new_groups))
        self.update(groups=new_groups, username=username)

    def to_dict(self):
        """
        Return a dictionary which describes a resource for the web ui
        
        :return: The dictionary with the information needed for the UI
        :rtype: dict
        """
        return {
            "uuid": self.uuid,
            "name": self.name,
            "email": self.email,
            "administrator": self.administrator,
            "active": self.active,
            "ldap": self.ldap,
            "groups": [g.to_dict() for g in Group.find_all(self.groups)],
        }

    def update(self, **kwargs):
        """
        Update a user. We intercept the call to encrypt the password if we
        modify it.
        
        :param username: the name of the user who made the action
        :type username: str, optional
        :param password: The plain password to encrypt
        :type password: str
        
        :return: The modified user
        :rtype: :class:`radon.model.User`
        """
        pre_state = self.mqtt_get_state()
        # If we want to update the password we need to encrypt it first

        if "password" in kwargs:
            kwargs["password"] = encrypt_password(kwargs["password"])

        if "username" in kwargs:
            username = kwargs["username"]
            del kwargs["username"]
        else:
            username = None

        super(User, self).update(**kwargs)
        user = User.find(self.name)
        post_state = user.mqtt_get_state()
        payload = user.mqtt_payload(pre_state, post_state)
        Notification.update_user(username, user.name, payload)
        return self
Ejemplo n.º 15
0
class TreeNode(Model):
    """TreeNode model
    
    This is used to store the hierarchy in Cassandra, Collections or Data 
    Objects.
    
    (container, name) is the partition key, it's the path of the element in the
    hierarchy. Collections ends with a '/' like in the CDMI standard. That way
    subcollections are stored closely in Cassandra nodes.
    version is the last part of the primary key so we can keep several versions
    of the hierarchy.
    
    
    :param container: The parent path of the object/collection
    :type container: :class:`columns.Text`
    :param name: The name of the object/collection. Collections ends with '/'
    :type name: :class:`columns.Text`
    :param version: The version of the object/collection
    :type version: :class:`columns.Integer`
    :param uuid: A CDMI uuid
    :type uuid: :class:`columns.Text`
    :param is_object: A boolean to simplify the test
    :type is_object: :class:`columns.Boolean`
    :param object_url: For data object the url to the content of the object. It
       can starts with 'cassandra:// if data is stored in Radon (See 
       :class:`radon.model.DataObject`
    :type object_url: :class:`columns.Text()`
    :param sys_meta: A Key/Value pair dictionary for system metadata
    :type sys_meta: :class:`columns.Map(columns.Text, columns.Text)`
    :param user_meta: A Key/Value pair dictionary for user metadata. Values are
       stored in JSON
    :type user_meta: :class:`columns.Map(columns.Text, columns.Text)`
    :param acl:  A Key/Value pair dictionary for ACL, a group name and the 
      associated ACE
    :type acl: :class:`columns.Map(columns.Text, columns.UserDefinedType(Ace))`
    """

    # Partitioned by container, clustered by name, so all files for a directory
    # are in the same partition
    container = columns.Text(partition_key=True)
    name = columns.Text(primary_key=True, partition_key=False)
    version = columns.Integer(primary_key=True,
                              partition_key=False,
                              default=0,
                              clustering_order="DESC")
    # UUID are not indexed
    uuid = columns.Text(default=default_cdmi_id)
    is_object = columns.Boolean(default=False)

    # URL to a data object if the Tree node is not a container
    # (radon:// for internal objects or anything else for a reference, we do not
    # restrict the syntax of the URL yet, it's up to the client to manage the
    # different URL stored in Cassandra)
    object_url = columns.Text()

    sys_meta = columns.Map(columns.Text, columns.Text)
    user_meta = columns.Map(columns.Text, columns.Text)
    acl = columns.Map(columns.Text, columns.UserDefinedType(Ace))

    def add_default_acl(self):
        """Add read access to all authenticated users"""
        self.create_acl_list(["AUTHENTICATED@"], [])

    def create_acl(self, acl_cql):
        """
        Replace the acl with the given cql string
        
        :param acl_cql: The acl string to put in Cassandra, can be easily
          generated in :meth:`radon.model.acl.acl_list_to_cql`
        :type acl_cql: str
        """
        session = connection.get_session()
        keyspace = radon.cfg.dse_keyspace
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE tree_node SET acl={} 
            WHERE container=%s and name=%s and version=%s""".format(acl_cql))
        session.execute(query, (self.container, self.name, self.version))

    def create_acl_list(self, read_access, write_access):
        """
        Create ACL from lists of group uuids
        
        :param read_access: A list of group names which have read access
        :type read_access: List[str]
        :param write_access: A list of group names which have write access
        :type write_access: List[str]
        """
        cql_string = acl_list_to_cql(read_access, write_access)
        self.create_acl(cql_string)

    def path(self):
        """
        Get the full path of the element. See :meth:`radon.util.merge`
    
        :return: The merged path
        :rtype: str
        """
        return merge(self.container, self.name)

    def update_acl(self, acl_cql):
        """
        Update the acl with the given cql string that will be added
        
        :param acl_cql: The acl string to put in Cassandra, can be easily
          generated in :meth:`radon.model.acl.acl_list_to_cql`
        :type acl_cql: str
        """
        session = connection.get_session()
        keyspace = radon.cfg.dse_keyspace
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE tree_node SET acl=acl+{} 
            WHERE container=%s and name=%s and version=%s""".format(acl_cql))
        session.execute(query, (self.container, self.name, self.version))
Ejemplo n.º 16
0
class DataObject(Model):
    """ The DataObject represents actual data objects, the tree structure
    merely references it.

    Each partition key gathers together all the data under one partition (the
    CDMI ID ) and the object properties are represented using static columns
    (one instance per partition)
    It has a similar effect to a join to a properties table, except the
    properties are stored with the rest of the partition

    This is an 'efficient' model optimised for Cassandra's quirks.

    N.B. by default Cassandra compresses its data ( using LZW ), so we get that
    for free."""
    # The 'name' of the object
    uuid = columns.Text(default=default_cdmi_id, required=True,
                        partition_key=True)
    #####################
    # These columns are the same (shared) between all entries with same id
    # (they use the static attribute , [ like an inode or a header ])
    #####################
    checksum = columns.Text(static=True)
    size = columns.BigInt(default=0, static=True)
    metadata = columns.Map(columns.Text, columns.Text, static=True)
    mimetype = columns.Text(static=True)
    alt_url = columns.Set(columns.Text, static=True)
    create_ts = columns.DateTime(default=datetime.now, static=True)
    modified_ts = columns.DateTime(default=datetime.now, static=True)
    type = columns.Text(required=False, static=True, default='UNKNOWN')
    acl = columns.Map(columns.Text, columns.UserDefinedType(Ace), static=True)
    # A general aid to integrity ...
    treepath = columns.Text(static=True, required=False)
    #####################
    # And 'clever' bit -- 'here' data, These will be the only per-record-fields
    # in the partition (i.e. object)
    # So the datastructure looks like a header , with an ordered list of blobs
    #####################
    # This is the 'clustering' key...
    sequence_number = columns.Integer(primary_key=True, partition_key=False)
    blob = columns.Blob(required=False)
    compressed = columns.Boolean(default=False)
    #####################

    @classmethod
    def append_chunk(cls, uuid, raw_data, sequence_number, compressed=False):
        """Create a new blob for an existing data_object"""
        if compressed:
            f = StringIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data
        data_object = cls(uuid=uuid,
                          sequence_number=sequence_number,
                          blob=data,
                          compressed=compressed)
        data_object.save()
        return data_object


    def chunk_content(self):
        """
        Yields the content for the driver's URL, if any
        a chunk at a time.  The value yielded is the size of
        the chunk and the content chunk itself.
        """
        entries = DataObject.objects.filter(uuid=self.uuid)
        for entry in entries:
            if entry.compressed:
                data = StringIO(entry.blob)
                z = zipfile.ZipFile(data, 'r')
                content = z.read("data")
                data.close()
                z.close()
                yield content
            else:
                yield entry.blob


    @classmethod
    def create(cls, raw_data, compressed=False, metadata=None, create_ts=None, acl=None):
        """data: initial data"""
        new_id = default_cdmi_id()
        now = datetime.now()
        if compressed:
            f = StringIO()
            z = zipfile.ZipFile(f, "w", zipfile.ZIP_DEFLATED)
            z.writestr("data", raw_data)
            z.close()
            data = f.getvalue()
            f.close()
        else:
            data = raw_data
        
        kwargs = {
            "uuid": new_id,
            "sequence_number": 0,
            "blob": data,
            "compressed": compressed,
            "modified_ts": now
        }
        if metadata:
            kwargs['metadata'] = metadata
        if create_ts:
            kwargs['create_ts'] = create_ts
        else:
            kwargs['create_ts'] = now
        if acl:
            kwargs['acl'] = acl
        new = super(DataObject, cls).create(**kwargs)
        return new


    def create_acl(self, acl_cql):
        """Replace the static acl with the given cql string"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE data_object SET acl = {}
            WHERE uuid=%s""".format(acl_cql))
        session.execute(query, (self.uuid,))


    def create_acl_cdmi(self, cdmi_acl):
        """""Create entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.create_acl(cql_string)


    def create_acl_list(self, read_access, write_access):
        """Create ACL from two lists of groups id, existing ACL are replaced"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.create_acl(cql_string)


    @classmethod
    def delete_id(cls, uuid):
        """Delete all blobs for the specified uuid"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement("""DELETE FROM data_object WHERE uuid=%s""")
        session.execute(query, (uuid,))


    @classmethod
    def find(cls, uuid):
        """Find an object by uuid"""
        entries = cls.objects.filter(uuid=uuid)
        if not entries:
            return None
        else:
            return entries.first()


    def update(self, **kwargs):
        """Update a data object"""
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        for arg in kwargs:
            # For static fields we can't use the name in the where condition
            if arg in static_fields:
                query = SimpleStatement("""UPDATE data_object SET {}=%s
                    WHERE uuid=%s""".format(arg))
                session.execute(query, (kwargs[arg], self.uuid))
            else:
                print """UPDATE data_object SET {}=%s
                    WHERE uuid=%s and sequence_number=%s""".format(arg)
                query = SimpleStatement("""UPDATE data_object SET {}=%s
                    WHERE uuid=%s and sequence_number=%s""".format(arg))
                session.execute(query, (kwargs[arg], self.uuid, self.sequence_number))
        return self


    def update_acl(self, acl_cql):
        """Update the static acl with the given cql string
        """
        cfg = get_config(None)
        session = connection.get_session()
        keyspace = cfg.get('KEYSPACE', 'indigo')
        session.set_keyspace(keyspace)
        query = SimpleStatement(u"""UPDATE data_object SET acl = acl + {}
            WHERE uuid=%s""".format(acl_cql))
        session.execute(query, (self.uuid,))


    def update_acl_cdmi(self, cdmi_acl):
        """"Update entry ACL from a cdmi object (list of dict)"""
        cql_string = acl_cdmi_to_cql(cdmi_acl)
        self.update_acl(cql_string)


    def update_acl_list(self, read_access, write_access):
        """Update ACL from two lists of groups id, existing ACL are replaced"""
        cql_string = acl_list_to_cql(read_access, write_access)
        self.update_acl(cql_string)