Beispiel #1
0
 def _build_value_tables(self):
     self.value_continuous = Table(
         "value_continuous",
         self.metadata,
         Column("person_id", ForeignKey("person.id")),
         Column("measure_id", ForeignKey("measure.id")),
         Column("value", Float(), nullable=False),
         PrimaryKeyConstraint("person_id", "measure_id"),
     )
     self.value_ordinal = Table(
         "value_ordinal",
         self.metadata,
         Column("person_id", ForeignKey("person.id")),
         Column("measure_id", ForeignKey("measure.id")),
         Column("value", Float(), nullable=False),
         PrimaryKeyConstraint("person_id", "measure_id"),
     )
     self.value_categorical = Table(
         "value_categorical",
         self.metadata,
         Column("person_id", ForeignKey("person.id")),
         Column("measure_id", ForeignKey("measure.id")),
         Column("value", String(127), nullable=False),
         PrimaryKeyConstraint("person_id", "measure_id"),
     )
     self.value_other = Table(
         "value_other",
         self.metadata,
         Column("person_id", ForeignKey("person.id")),
         Column("measure_id", ForeignKey("measure.id")),
         Column("value", String(127), nullable=False),
         PrimaryKeyConstraint("person_id", "measure_id"),
     )
class CovidObservation(Base):
    __tablename__ = "covid_observations"
    dt = Column(Date)
    location_id = Column(String, FKCascade(Location.id))
    variable_id = Column(Integer, FKCascade(CovidVariable.id))
    demographic_id = Column(Integer, FKCascade(CovidDemographic.id))
    provider_id = Column(Integer, FKCascade(CovidProvider.id))
    value = Column(Numeric)
    last_updated = Column(DateTime, nullable=False, default=func.now())
    source_url = Column(String)
    source_name = Column(String)
    deleted = Column(Boolean, nullable=False, default=False)
    delete_batch_id = Column(BigInteger)
    __table_args__ = (
        PrimaryKeyConstraint(
            "dt",
            "location_id",
            "variable_id",
            "demographic_id",
            "provider_id",
            sqlite_on_conflict="REPLACE",
        ),
        {
            "schema": "data"
        },
    )
Beispiel #3
0
class Temp_table(declarative_base()):
    __tablename__ = 'test_table'
    __table_args__ = (PrimaryKeyConstraint('country', 'vehicle_id',
                                           'licence'), )
    country = Column(String(4))
    vehicle_id = Column(String(255))
    licence = Column(String(255))
Beispiel #4
0
class External(DBBASE):
    """This class serialises transcript ids, source of the score, and score itself."""

    __tablename__ = "external"

    query_id = Column(Integer, ForeignKey(Query.query_id), unique=False)
    source_id = Column(Integer,
                       ForeignKey(ExternalSource.source_id),
                       unique=False)
    ext_constraint = PrimaryKeyConstraint("query_id",
                                          "source_id",
                                          name="source_key")
    source = column_property(
        select([ExternalSource.source
                ]).where(ExternalSource.source_id == source_id))
    score = Column(Float)

    query = column_property(
        select([Query.query_name]).where(Query.query_id == query_id))

    __table_args__ = (ext_constraint, )

    def __init__(self, query_id, source_id, score):

        self.query_id = query_id
        self.source_id = source_id
        self.score = score
Beispiel #5
0
class Followers(db.Model):
    __tablename__ = 'followers'
    __table_args__ = (PrimaryKeyConstraint('follower_id', 'followed_id'), )
    follower_id = db.Column('follower_id', db.Integer,
                            db.ForeignKey('users.id'))
    followed_id = db.Column('followed_id', db.Integer,
                            db.ForeignKey('users.id'))
Beispiel #6
0
class Birdstock(db.Model):
    __tablename__ = 'bird_stock'
    # Shed number
    date_created = db.Column(db.DateTime, default=db.func.current_timestamp())
    bshed = db.Column(db.Integer,
                      db.ForeignKey('shed_info.shed'),
                      nullable=False)
    age = db.Column(db.Float, nullable=False)
    btype = db.Column(db.String(128), nullable=False)
    quantity = db.Column(db.Integer, nullable=False)

    # Relastionships
    bsales = db.relationship('Birdsales', backref='bstock', single_parent=True)

    __table_args__ = (
        PrimaryKeyConstraint('bshed', 'age', 'btype'),
        {},
    )

    def __init__(self, shed, age, btype, quantity):
        # shed belongs to the relationship backref varuable in shed_info
        self.shed = shed
        self.age = age
        self.btype = btype
        self.quantity = quantity
Beispiel #7
0
    def _table(self, table_name):
        """Retrieves, reflects, and caches table objects

        Actual implementation of __getitem__
        """
        try:
            return self._table_cache[table_name]
        except KeyError:
            self.reflect_table(table_name)
            table = self.metadata.tables[table_name]
            table_dict = {'__table__': table, '__tablename__': table_name}

            # easy workaround for views which don't have primary key
            # this has to be replaced with method to recognize table primary key in future
            # if it isn't enough

            pk = table.primary_key
            if isinstance(pk, PrimaryKeyConstraint) and len(pk.columns) == 0:
                id_column = table.c.get('id')
                if id_column is not None:
                    table.primary_key = PrimaryKeyConstraint(id_column)
            try:
                table_cls = type(str(table_name), (self.table_base, ),
                                 table_dict)
                self._table_cache[table_name] = table_cls
                return table_cls
            except ArgumentError:
                # This usually happens on join tables with no PKs
                logger.info(
                    'Unable to create table class for table "{}"'.format(
                        table_name))
                return None
Beispiel #8
0
class SearchWords(Base):
    __tablename__ = "session_search_words"
    session_id = Column("session_id", Integer)
    word_id = Column("word_id", Integer)
    word_temp_pk = PrimaryKeyConstraint(session_id, word_id)
    word_srch_word_id_words_id_foreign = ForeignKeyConstraint(
        columns=[session_id], refcolumns=[Session.id])
Beispiel #9
0
class ReportTableDefinition(JusticeCountsBase):
    """The definition for what a table within a report describes."""

    __tablename__ = "report_table_definition"

    id = Column(Integer, autoincrement=True)

    system = Column(Enum(System))
    metric_type = Column(Enum(MetricType))
    measurement_type = Column(Enum(MeasurementType))

    # Any dimensions where the data in the table only accounts for a subset of values for that dimension. For instance,
    # a table for the population metric may only cover data for the prison population, not those on parole or
    # probation. In that case filters would include a filter on population type.
    filtered_dimensions = Column(ARRAY(String(255)))
    # The value for each dimension from the above array.
    filtered_dimension_values = Column(ARRAY(String(255)))
    # The dimensions that the metric is broken down by in the report table. Each cell in a table instance has a unique
    # combination of values for the aggregated dimensions. Dimensions are sorted deterministically within the array.
    aggregated_dimensions = Column(ARRAY(String(255)))

    __table_args__ = tuple(
        [
            PrimaryKeyConstraint(id),
            UniqueConstraint(
                metric_type,
                measurement_type,
                filtered_dimensions,
                filtered_dimension_values,
                aggregated_dimensions,
            ),
        ]
    )
Beispiel #10
0
class Compute(Base):
    __tablename__ = 'compute'
    id = Column(Integer, autoincrement=True)
    containerId = Column(String(100))
    containerName = Column(String(50), primary_key=True)
    remoteName = Column(String(100))
    imageName = Column(String(50))
    deviceId = Column(String(50))
    username = Column(String(50), primary_key=True)
    appPath = Column(String(100))
    status = Column(String(50))
    active = Column(Boolean)
    __total__args__ = (PrimaryKeyConstraint(containerName, username), {})

    def __repr__(self):
        return "<compute('containerId='%s','containerName='%s', 'deviceId=%s' \
        'userName'='%s')>" % (self.containerId, self.containerName,
                              self.deviceId, self.username)

    def get_dict(self):
        ret = {
            'containerId': self.containerId,
            'containerName': self.containerName,
            'imageName': self.imageName,
            'username': self.username,
            'appPath': self.appPath,
            'deviceId': self.deviceId,
            'status': self.status
        }
        print(ret)
        return ret
Beispiel #11
0
class FollowedUser(Base):
    """Tracks which user follows which user.

    E.g. if user 1 follows user 2, user 1 will see changes of user 2 in their
    feed.
    """
    __tablename__ = 'feed_followed_users'

    followed_user_id = Column(
        Integer, ForeignKey(users_schema + '.user.id'),
        nullable=False)
    followed_user = relationship(
        User, primaryjoin=followed_user_id == User.id
    )

    follower_user_id = Column(
        Integer, ForeignKey(users_schema + '.user.id'),
        nullable=False)
    follower_user = relationship(
        User, primaryjoin=follower_user_id == User.id
    )

    __table_args__ = (
        PrimaryKeyConstraint(followed_user_id, follower_user_id),
        CheckConstraint(
            'followed_user_id != follower_user_id',
            name='check_feed_followed_user_self_follow'),
        Base.__table_args__
    )
Beispiel #12
0
class FilterArea(Base):
    """Feed filter on areas for a user.

    E.g. with an entry (user_id=1, area_id=2) the user 1 will only
    see changes in area 2 in their feed (except changes from followed users).
    """
    __tablename__ = 'feed_filter_area'

    area_id = Column(
        Integer, ForeignKey(schema + '.areas.document_id'),
        nullable=False)
    area = relationship(
        Area, primaryjoin=area_id == Area.document_id)

    user_id = Column(
        Integer, ForeignKey(users_schema + '.user.id'),
        nullable=False, index=True)
    user = relationship(
        User, primaryjoin=user_id == User.id
    )

    __table_args__ = (
        PrimaryKeyConstraint(area_id, user_id),
        Base.__table_args__
    )
Beispiel #13
0
class Report(JusticeCountsBase):
    """A document that is published by a source that contains data pertaining to the Justice Counts Framework.
    """
    __tablename__ = 'report'

    id = Column(Integer, autoincrement=True)

    # The source that this report is published by.
    source_id = Column(Integer, nullable=False)
    # This distinguishes between the many types of reports that a single source may produce, e.g. a Daily Status
    # Report or Monthly Fact Sheet, that contain different information and are potentially fetched and parsed using
    # different logic.
    type = Column(String(255), nullable=False)
    # Identifies a specific instance of a given report type. It should be constructed such that it is unique within a
    # given report type and source. The combination of report type and instance is used when ingesting a report to
    # determine whether this is an update to an existing report or a new report. For PDF reports, this may simply be
    # the title of the document after some validation has been performed. For webpages it may need to be dynamically
    # generated.
    instance = Column(String(255), nullable=False)

    # The date the report was published.
    publish_date = Column(Date, nullable=False)
    # The method used to acquire the data (e.g. scraped).
    acquisition_method = Column(Enum(AcquisitionMethod), nullable=False)
    # TODO(#4485): Add a list of projects (e.g. Justice Counts, Spark) for which this data was ingested.

    __table_args__ = tuple([
        PrimaryKeyConstraint(id),
        UniqueConstraint(source_id, type, instance),
        ForeignKeyConstraint([source_id], [Source.id])])

    source = relationship(Source)
Beispiel #14
0
class External(DBBASE):

    """This class serialises transcript ids, source of the score, and score itself."""

    __tablename__ = "external"

    query_id = Column(Integer, ForeignKey(Query.query_id), unique=False)
    source_id = Column(Integer, ForeignKey(ExternalSource.source_id), unique=False)
    ext_constraint = PrimaryKeyConstraint("query_id", "source_id", name="source_key")
    source = column_property(select([ExternalSource.source]).where(
        ExternalSource.source_id == source_id))
    score = Column(String, nullable=False)

    query = column_property(select([Query.query_name]).where(
        Query.query_id == query_id))

    valid_raw = column_property(select([ExternalSource.valid_raw]).where(
        ExternalSource.source_id == source_id))

    rtype = column_property(select([ExternalSource.rtype]).where(
        ExternalSource.source_id == source_id))

    __table_args__ = (ext_constraint, )

    def __init__(self, query_id, source_id, score):

        self.query_id = query_id
        self.source_id = source_id
        if not isinstance(score, numbers.Number):
            raise sqlalchemy.exc.ArgumentError("Invalid score for external values: {}".format(type(score)))
        score = str(score)
        assert score.strip()
        self.score = str(score)
Beispiel #15
0
class SubjectExpert(db.Model):
    __tablename__ = 'SubjectExperts'

    __table_args__ = (PrimaryKeyConstraint('AuthorId', 'FileId'), )

    FileId = db.Column(db.BigInteger, db.ForeignKey('Files.FileId'))
    AuthorId = db.Column(db.BigInteger, db.ForeignKey('Authors.AuthorId'))
    Score = db.Column(db.Integer, nullable=False)
Beispiel #16
0
class ReportTableInstance(JusticeCountsBase):
    """An instance of a table that contains an actual set of data points along a shared set of dimensions.

    It typically maps to a literal table with columns and rows in a report, but in some cases a literal table in the
    report may map to multiple tables as defined here.
    """

    __tablename__ = "report_table_instance"

    id = Column(Integer, autoincrement=True)

    report_id = Column(Integer, nullable=False)
    report_table_definition_id = Column(Integer, nullable=False)

    # The window of time that values in this table cover, represented by a start date (inclusive) and an end date
    # (exclusive). The data could represent an instant measurement, where the start and end are equal, or a window, e.g.
    # ADP over the last month.
    time_window_start = Column(Date, nullable=False)
    time_window_end = Column(Date, nullable=False)

    # This field can be used to store any text that the source provides describing the methodology used to calculate
    # the data. This is stored on instances so that if it changes from report to report, we don't overwrite methodology
    # for prior instances.
    methodology = Column(String)

    __table_args__ = tuple(
        [
            PrimaryKeyConstraint(id),
            # TODO(#4476): We need to include time window as part of the unique
            # constraint in case there is data for the same table definition that
            # represents multiple time windows within a single report. To make this
            # work with updates, I think we will re-ingest all table instances for
            # a particular report table definition in an updated report.
            UniqueConstraint(
                report_id,
                report_table_definition_id,
                time_window_start,
                time_window_end,
            ),
            ForeignKeyConstraint([report_id], [Report.id], ondelete="CASCADE"),
            ForeignKeyConstraint(
                [report_table_definition_id], [ReportTableDefinition.id]
            ),
        ]
    )

    report = relationship(Report, back_populates="report_table_instances")
    report_table_definition = relationship(ReportTableDefinition)
    cells = relationship(
        "Cell",
        back_populates="report_table_instance",
        lazy="selectin",
        cascade="all, delete",
        passive_deletes=True,
    )
Beispiel #17
0
class Infos(Base):

    __tablename__ = 'infos'
    __table_args__ = (
        PrimaryKeyConstraint('uuid'),
    )

    uuid = Column(String(38), nullable=False, default=new_uuid)
    wazo_version = Column(String(64), nullable=False)
    live_reload_enabled = Column(Boolean, nullable=False, server_default='True')
    timezone = Column(String(128))
    configured = Column(Boolean, nullable=False, server_default='False')
Beispiel #18
0
class Source(JusticeCountsBase):
    """A website or organization that publishes reports.

    It is not necessarily specific to a state or jurisdiction, but frequently is.
    """

    __tablename__ = "source"

    id = Column(Integer, autoincrement=True)

    name = Column(String(255), nullable=False)

    __table_args__ = tuple([PrimaryKeyConstraint(id), UniqueConstraint(name)])
Beispiel #19
0
class AWL(Base):
    """Schema for the awl table"""

    __tablename__ = 'awl'

    username = Column("username", String(255))
    email = Column("email", String(200))
    ip = Column("ip", String(40))
    count = Column("count", Integer)
    totscore = Column("totscore", Float)
    signedby = Column("signedby", String(255))

    __table_args__ = (PrimaryKeyConstraint("username", "email", "signedby",
                                           "ip"), )
Beispiel #20
0
 def __table_args__(cls):
     return (
         PrimaryKeyConstraint(
             "dt",
             "location",
             "variable_id",
             "demographic_id",
             sqlite_on_conflict="REPLACE",
         ),
         ForeignKeyConstraint(
             [cls.location, cls.location_type],
             [Location.location, Location.location_type],
         ),
     )
Beispiel #21
0
class DoToday(Base):
    __tablename__ = "dotoday"
    date = Column(Date,
                  primary_key=True,
                  sqlite_on_conflict_unique='IGNORE',
                  sqlite_on_conflict_primary_key='IGNORE')
    id = Column(Integer,
                primary_key=True,
                sqlite_on_conflict_unique='IGNORE',
                sqlite_on_conflict_primary_key='IGNORE')
    dotoday = Column(Boolean, default=False)
    PrimaryKeyConstraint('date', 'id', sqlite_on_conflict='IGNORE')
    UniqueConstraint('date', 'id', sqlite_on_conflict='IGNORE')

    def __repr__(self):
        return f'<User date:{self.date} id:{self.id} dotoday:{self.dotoday}>'
class Mater(declarative_base()):
    __tablename__ = 'mater'
    __table_args__ = (
        PrimaryKeyConstraint('country', 'vehicle_id', 'licence'),
    )
    country = Column(String(4))
    vehicle_id = Column(String(255))
    licence = Column(String(255))
    col04 = Column(String(255))
    col05 = Column(String(255))
    col06 = Column(String(255))
    col07 = Column(String(255))
    col08 = Column(String(255))
    col09 = Column(String(255))
    col10 = Column(String(255))
    col11 = Column(String(255))
    col12 = Column(String(255))
    col13 = Column(String(255))
    col14 = Column(String(255))
    col15 = Column(String(255))
    col16 = Column(String(255))
    col17 = Column(String(255))
    col18 = Column(String(255))
    col19 = Column(String(255))
    col20 = Column(String(255))
    col21 = Column(String(255))
    col22 = Column(String(255))
    col23 = Column(String(255))
    col24 = Column(String(255))
    col25 = Column(String(255))
    col26 = Column(String(255))
    col27 = Column(String(255))
    col28 = Column(String(255))
    col29 = Column(String(255))
    col30 = Column(String(255))
    col31 = Column(String(255))
    col32 = Column(String(255))
    col33 = Column(String(255))
    col34 = Column(String(255))
    col35 = Column(String(255))
    col36 = Column(String(255))
    col37 = Column(String(255))
    col38 = Column(String(255))
    col39 = Column(String(255))
    col40 = Column(String(255))
Beispiel #23
0
class Cell(JusticeCountsBase):
    """A single data point within a table.
    """
    __tablename__ = 'cell'

    id = Column(Integer, autoincrement=True)

    report_table_instance_id = Column(Integer, nullable=False)
    aggregated_dimension_values = Column(ARRAY(String(255)), nullable=False)

    value = Column(Numeric, nullable=False)

    __table_args__ = tuple([
        PrimaryKeyConstraint(id),
        UniqueConstraint(report_table_instance_id, aggregated_dimension_values),
        ForeignKeyConstraint([report_table_instance_id], [ReportTableInstance.id])])

    report_table_instance = relationship(ReportTableInstance)
Beispiel #24
0
class NetworkModuleAnnotation ( Base ):
    __tablename__ = "NetworkModuleAnnotation"
    
    # The module ID
    networkModule_id = Column( String, ForeignKey('NetworkModule.moduleID') )
    # The annotation GOID
    annotationID = Column( String )
    # The annotation name
    annotationTerm = Column( String )

    # Define the Composite PrimaryKey
    __table_args__ = (
        PrimaryKeyConstraint('networkModule_id', 'annotationID'),
    )
    
    #
    ## The constructor
    def __init__(self, annotation_term, annotation_id):
        
        self.annotationName = annotation_term
        self.annotationID = annotation_id
Beispiel #25
0
class Birdsales(db.Model):
    __tablename__ = 'bird_sales'
    date_created = db.Column(db.DateTime,  default=db.func.current_timestamp())
    age = db.Column(db.Float,  nullable=False)
    btype = db.Column(db.String(128),  nullable=False)
    quantity = db.Column(db.Integer,  nullable=False)
    amount = db.Column(db.Float, nullable=False)
    bshed = db.Column(db.Integer, db.ForeignKey(
        'bird_stock.bshed'), nullable=False)
    __table_args__ = (
        PrimaryKeyConstraint('bshed', 'age', 'btype', 'quantity', 'amount'),
        {},
    )

    def __init__(self, age, btype, quantity, amount, bstock):
        # bstock belongs to the relationship backref varuable in bird_stock table
        self.bstock = bstock
        self.age = age
        self.btype = btype
        self.quantity = quantity
        self.amount = amount
Beispiel #26
0
class Reserve(Base):
    __tablename__ = "reserves"
    __table_args__ = (PrimaryKeyConstraint("sid", "bid", "day"), {})

    sid = Column(Integer, ForeignKey("sailors.sid"))
    bid = Column(Integer, ForeignKey("boats.bid"))
    day = Column(DateTime)
    price = Column(Integer, default=0)
    rating = Column(Integer, default=-1)

    sailor = relationship("Sailor")

    def __init__(self, sid, bid, day, price, rating):
        self.sid = sid
        self.bid = bid
        self.day = datetime.datetime.strptime(day, "%Y-%m-%d")
        self.price = price
        self.rating = rating

    def __repr__(self):
        return f"<Reservation(sid={self.sid}, bid={self.bid}, day={self.day}, price={self.price})>"
class PistonCup(declarative_base()):
    __tablename__ = 'pistoncup'
    __table_args__ = (
        PrimaryKeyConstraint('country', 'make', 'model'),
    )
    country = Column(String(4))
    make = Column(String(255))
    model = Column(String(255))
    amount_damage = Column(String(255))
    rnk = Column(Integer)

    def __repr__(self):
        return f"pistoncup(country={self.country}; make={self.make}; model={self.model}; " \
               f"amount_damage={self.amount_damage}; rank={self.rnk})"

    @classmethod
    def wipe_slate(cls):
        return sase.delete(cls)

    @classmethod
    def import_scoreboard(cls, top_x: sase.select) -> sase.insert:
        return sase.insert(cls).from_select(inspect(cls).columns.keys(), sase.select(top_x.columns))
class WeirdYears(declarative_base()):
    __tablename__ = 'weirdyears'
    __table_args__ = (
        PrimaryKeyConstraint('country', 'make', 'model'),
    )
    country = Column(String(4))
    make = Column(String(255))
    model = Column(String(255))
    build_year = Column(String(255))

    def __repr__(self):
        return f"weirdyears(country={self.country}; make={self.make}; model={self.model}; build_year={self.build_year})"

    @classmethod
    def save_weird_years(cls, source_table: Type[declarative_base] = Vehicle) -> sase.Insert:
        """
        This function generates the statement to insert weird build_years with accompanying primary keys to
        this table.
        The `prefix_with("IGNORE")` is to run the query even though some strings cannot be converted to integers.
        The result is still as expected.

        :param source_table: Main table where all unsanitized data is stored, is. vehicles
        :return: insert statement object that can be executed against the database
        """

        stmt = sase.insert(cls).prefix_with("IGNORE").from_select(
            inspect(cls).columns.keys(),
            sase.select(
                [source_table.country, source_table.vehicle_id, source_table.licence, source_table.build_year]
            ).where(
                sase.or_(
                    sase.cast(source_table.build_year, Integer) < constants.MIN_YEAR,
                    sase.cast(source_table.build_year, Integer) > constants.MAX_YEAR)
            )
        )
        return stmt
Beispiel #29
0
class Hit(DBBASE):
    """This class is used to serialise and store in a DB a BLAST hit.
    Stored attributes:

    - id                Indexing key
    - query_id            Foreign ID key for the query table
    - target_id            Foreign ID key for the target table
    - qt_constrating
    """

    __tablename__ = "hit"
    query_id = Column(Integer, ForeignKey(Query.query_id), unique=False)
    target_id = Column(Integer, ForeignKey(Target.target_id), unique=False)
    evalue = Column(Float)
    bits = Column(Float)
    global_identity = Column(Float)
    global_positives = Column(Float)
    query_start = Column(Integer)
    query_end = Column(Integer)
    target_start = Column(Integer)
    target_end = Column(Integer)
    hit_number = Column(Integer)
    query_multiplier = Column(
        Float)  # Probably I should move this to a separate table!
    target_multiplier = Column(Float)
    query_aligned_length = Column(Integer)
    target_aligned_length = Column(Integer)

    # query_id, target_id, evalue, bits, global_identity, global_positives, query_start, query_end, target_start,
    # target_end, hit_number, query_multiplier, target_multiplier, query_aligned_length, target_aligned_length

    # Indices and constraints
    qt_constraint = PrimaryKeyConstraint("query_id",
                                         "target_id",
                                         name="hit_id")
    qt_index = Index("qt_index", "query_id", "target_id", unique=True)
    query_index = Index("hit_query_idx", "query_id", unique=False)
    target_index = Index("hit_target_idx", "target_id", unique=False)
    evalue_index = Index('hit_evalue_idx', 'evalue', unique=False)

    query_object = relationship(Query,
                                uselist=False,
                                lazy="select",
                                innerjoin=True,
                                backref=backref("hits",
                                                cascade="all, delete-orphan"))
    target_object = relationship(Target,
                                 uselist=False,
                                 lazy="select",
                                 innerjoin=True,
                                 backref=backref("hits",
                                                 cascade="all, delete-orphan"))

    @hybrid_property
    def query(self):
        return self.query_object.query_name

    @hybrid_property
    def query_length(self):
        return self.query_object.query_length

    @hybrid_property
    def target(self):
        return self.target_object.target_name

    @hybrid_property
    def target_length(self):
        return self.target_object.target_length

    join_condition = "and_(Hit.query_id==Hsp.query_id, Hit.target_id==Hsp.target_id)"
    hsps = relationship(Hsp,
                        uselist=True,
                        lazy="subquery",
                        backref=backref(
                            "hit_object",
                            uselist=False,
                            overlaps="hits,target_object,query_object"),
                        cascade="all, delete-orphan",
                        single_parent=True,
                        foreign_keys=[query_id, target_id],
                        overlaps="hits,target_object,query_object",
                        primaryjoin=join_condition)

    __table_args__ = (qt_constraint, qt_index, query_index, target_index,
                      evalue_index)

    # All arguments are necessary and it is more convenient to have them here
    # rather than in a struct/dict/whatever
    # pylint: disable=too-many-arguments
    def __init__(self,
                 query_id,
                 target_id,
                 evalue,
                 bits,
                 global_identity,
                 global_positives,
                 query_start,
                 query_end,
                 target_start,
                 target_end,
                 query_aligned_length,
                 target_aligned_length,
                 query_length,
                 alignment,
                 hit_number=1,
                 query_multiplier=1,
                 target_multiplier=1):
        """This function takes as input the id of a target, the id of the query,
        and a hit-object from the XML. The multiplier keyword is used to calculate
        the ratio between the query and the target.

        :param query_id: reference key for the Query table
        :type query_id: int

        :param target_id: reference key for the Target table
        :type target_id: int

        :param alignment: The BLAST Hit object

        :param evalue: Evalue of the hit (recovered from the "description" object)
        :type evalue: float

        :param bits: BitScore of the hit (recovered from the "description" object)
        :type bits: float

        :param hit_number: progressive index that indicates the priority
        of the hit in the database
        :type hit_number: int

        :param query_multiplier: either 1 or 3, it depends on the type of BLAST
        :type query_multiplier: int

        :param target_multiplier: either 1 or 3, it depends on the type of BLAST
        :type target_multiplier: int
        """

        self.query_id, self.query_start, self.query_end = query_id, query_start, query_end
        self.query_aligned_length = query_aligned_length

        self.target_id, self.target_start, self.target_end = target_id, target_start, target_end
        self.target_aligned_length = target_aligned_length

        self.hit_number = hit_number
        self.evalue, self.bits = evalue, bits

        self.global_identity, self.global_positives = global_identity, global_positives
        self.query_multiplier = query_multiplier
        self.target_multiplier = target_multiplier

    # pylint: enable=too-many-arguments

    def __str__(self):
        line = [
            self.query, self.target, self.evalue, self.bits, self.query_start,
            self.query_end, self.target_start, self.target_end,
            self.query_length, self.target_length
        ]

        return "\t".join(str(x) for x in line)

    @classmethod
    def from_hit(cls,
                 query_id,
                 target_id,
                 query_length,
                 alignment,
                 query_multiplier=1,
                 target_multiplier=1,
                 hit_number=1) -> (object, List[Hsp]):
        """"""

        prepared_hit, prepared_hsps = prepare_hit(
            alignment,
            query_id,
            target_id,
            query_multiplier=query_multiplier,
            target_multiplier=target_multiplier,
            query_length=query_length)
        hsps = []
        for counter, hsp in enumerate(prepared_hsps, start=1):
            hsps.append(
                Hsp.from_dict(counter=counter,
                              query_id=query_id,
                              target_id=target_id,
                              hsp_dict=hsp))

        new = Hit(query_id=query_id,
                  target_id=target_id,
                  query_multiplier=query_multiplier,
                  hit_number=hit_number,
                  target_multiplier=target_multiplier,
                  **prepared_hit)

        return new, hsps

    @classmethod
    def as_dict_static(cls, state_obj):
        """Method to return a dict representation of the object.
        Static method to be called from outside the class.

        :param state_obj: a namedtuple or an instance of this class

        :rtype: dict
        """

        keys = [
            "evalue",
            "bits",
            "global_identity",
            "global_positives",
            "query_start",
            "query_end",
            "target_start",
            "target_end",
            "hit_number",
            "query_multiplier",
            "target_multiplier",
            "query_aligned_length",
            "target_aligned_length",
        ]

        state = dict().fromkeys(keys)

        for key in keys:
            state[key] = getattr(state_obj, key)

        return state

    @classmethod
    def as_full_dict_static(cls, hit_tuple, hsp_list, query_tuple,
                            target_tuple):
        """
        :param hit_tuple: Hit namedtuple (from direct query to the DB)
        :type hit_tuple: collections.namedtuple
        :param hsp_list: list of hsp dictionaries from Hsp.as_dict_static
        :type hsp_list: list(collections.namedtuple)

        :param query_tuple: Query namedtuple
        :type query_tuple: collections.namedtuple
        :param target_tuple: Target namedtuple
        :type target_tuple: collections.namedtuple
        :rtype: dict
        """

        state = cls.as_dict_static(hit_tuple)
        hsps = [Hsp.as_dict_static(h) for h in hsp_list]

        state["query"] = query_tuple.query_name
        state["target"] = target_tuple.target_name
        state["query_length"] = query_tuple.query_length
        state["target_length"] = target_tuple.target_length
        state["query_hit_ratio"] = (query_tuple.query_length / hit_tuple.query_multiplier) / \
                                   (target_tuple.target_length / hit_tuple.target_multiplier)
        state["hit_query_ratio"] = (target_tuple.target_length / hit_tuple.target_multiplier) / \
                                   (query_tuple.query_length / hit_tuple.query_multiplier)
        state["query_cov"] = state[
            "query_aligned_length"] / query_tuple.query_length
        assert state["query_cov"] <= 1, (state, )
        state["target_cov"] = state[
            "target_aligned_length"] / target_tuple.target_length
        assert state["target_cov"] <= 1, (state, )

        state["hsps"] = []
        for hsp in hsps:
            hsp["query_hsp_cov"] = (hsp["query_hsp_end"] -
                                    hsp["query_hsp_start"] + 1)
            hsp["query_hsp_cov"] /= (state["query_length"])
            hsp["target_hsp_cov"] = (hsp["target_hsp_end"] -
                                     hsp["target_hsp_start"] + 1)
            hsp["target_hsp_cov"] /= (state["target_length"])
            state["hsps"].append(hsp)

        return state

    def as_dict(self):
        """Method to return a dict representation of the object.
        Necessary for storing.

        :rtype: dict
        """

        state = self.as_dict_static(self)

        # Retrieving the values ONCE
        query_object = self.query_object.as_tuple()
        target_object = self.target_object.as_tuple()

        state["query"] = query_object.query_name
        state["target"] = target_object.target_name
        state["query_length"] = query_object.query_length
        state["target_length"] = target_object.target_length
        state["query_hit_ratio"] = state["query_length"] / state["query_multiplier"] /\
            (state["target_length"] / state["target_multiplier"])

        state["hit_query_ratio"] = state["target_length"] / state["target_multiplier"] /\
            (state["query_length"] / state["query_multiplier"])

        state["hsps"] = []
        for hsp in self.hsps:
            dict_hsp = hsp.as_dict()
            dict_hsp["query_hsp_cov"] = (dict_hsp["query_hsp_end"] -
                                         dict_hsp["query_hsp_start"] + 1)
            dict_hsp["query_hsp_cov"] /= state["query_length"]
            dict_hsp["target_hsp_cov"] = (dict_hsp["target_hsp_end"] -
                                          dict_hsp["target_hsp_start"] + 1)
            dict_hsp["target_hsp_cov"] /= state["target_length"]
            state["hsps"].append(dict_hsp)

        return state

    @hybrid_property
    def query_hit_ratio(self):
        """
        This property returns the quotient (Query Length)/(Target Length)
        """

        ratio = self.query_length / self.query_multiplier
        ratio /= self.target_length / self.target_multiplier

        return ratio

    @hybrid_property
    def hit_query_ratio(self):
        """
        This property returns the quotient (Target Length)/(Query Length)
        """

        ratio = self.target_length / self.target_multiplier
        ratio /= (self.query_length / self.query_multiplier)

        return ratio
Beispiel #30
0
class Hsp(DBBASE):
    r"""
    This class serializes and stores into the DB the various HSPs.
    It is directly connected to the Hit table, through the "hit_id"
    reference key.
    The Hit reference can be accessed through the hit_object attribute;
    back-reference (Hit to Hsps) is given by the "hsps" attribute.

    Keys:

    :return hit_id: Reference key for the Hit table
    :rtype hit_id: int

    :return counter: It indicates the progressive number of the HSP for the hit
    :rtype counter: int

    :return query_hsp_start: Start position on the query
    :rtype query_hsp_start; int

    :return query_hsp_end: End position on the query
    :rtype query_hsp_end: int

    :return target_hsp_start: Start position on the target
    :rtype target_hsp_start: int

    :return target_hsp_end: End position on the target
    :rtype target_hsp_end: int

    :return hsp_evalue: Evalue of the HSP
    :rtype hsp_evalue: float

    :return hsp_bits: Bit-score of the HSP
    :rtype hsp_bits: float

    :return hsp_identity: Identity (in %) of the alignment
    :rtype  hsp_identity: float

    :return hsp_length: Length of the HSP
    :rtype hsp_length: int

    :return match: the match line between query and target, with the following specs:
        - If the position is a match/positive, keep the original value
        - If the position is a gap *for the query*, insert a - (dash)
        - If the position is a gap *for the target*, insert a _ (underscore)
        - If the position is a gap *for both*, insert a \ (backslash)

    An HSP row has the following constraints:
    - Counter,hit_id must be unique (and are primary keys)
    - The combination ("Hit_id","query_hsp_start","query_hsp_end",
    "target_hsp_start", "target_hsp_end") must be unique

    Moreover, the following properties are also present:

    :return query_object: The referenced Query
    :rtype query_object: Query

    :return target_object: The reference Target
    :rtype target_object: Target
    """

    __tablename__ = "hsp"
    counter = Column(Integer)  # Indicates the number of the HSP inside the hit
    query_id = Column(Integer, ForeignKey(Query.query_id), unique=False)
    target_id = Column(Integer, ForeignKey(Target.target_id), unique=False)
    pk_constraint = PrimaryKeyConstraint("counter",
                                         "query_id",
                                         "target_id",
                                         name="hsp_constraint")
    query_index = Index("hsp_query_idx", "query_id", unique=False)
    target_index = Index("hsp_target_idx", "target_id", unique=False)
    hsp_evalue_index = Index('hsp_evalue_idx', 'hsp_evalue', unique=False)
    combined_index = Index("hsp_combined_idx",
                           "query_id",
                           "target_id",
                           unique=False)
    full_index = Index("hsp_full_idx",
                       "counter",
                       "query_id",
                       "target_id",
                       unique=True)
    query_hsp_start = Column(Integer)
    query_hsp_end = Column(Integer)
    query_frame = Column(Integer)
    target_hsp_start = Column(Integer)
    target_hsp_end = Column(Integer)
    target_frame = Column(Integer)
    uni_constraint = UniqueConstraint("query_id", "target_id",
                                      "query_hsp_start", "query_hsp_end",
                                      "target_hsp_start", "target_hsp_end")
    match = Column(String(10000))
    hsp_evalue = Column(Float)
    hsp_bits = Column(Float)
    hsp_identity = Column(Float)
    hsp_positives = Column(Float)
    hsp_length = Column(Integer)

    query_object = relationship(Query, uselist=False)
    target_object = relationship(Target, uselist=False)

    query = column_property(
        select([Query.query_name]).where(Query.query_id == query_id))
    query_length = column_property(
        select([Query.query_length]).where(Query.query_id == query_id))
    target = select([Target.target_name]).where(Target.target_id == target_id)
    target_length = select([Target.target_length
                            ]).where(Target.target_id == target_id)

    __table_args__ = (pk_constraint, query_index, target_index, combined_index,
                      hsp_evalue_index)

    def __init__(self, hsp, counter, query_id, target_id):
        """

        :param hsp: an hsp object from the serialized XML

        :param counter: integer which indicates the HSP position in the HSP
         list for the hit
        :type counter: int

        :param query_id: Foreign key for the Query table
        :type query_id: int

        :param target_id: Foreign key for the Target table
        :type target_id: int
        """

        self.counter = counter
        hsp_dict, _, _ = prepare_hsp(hsp, counter)

        for key in hsp_dict:
            setattr(self, key, hsp_dict[key])

        self.query_id = query_id
        self.target_id = target_id

    def __str__(self):
        """Simple printing function."""
        line = [
            self.query, self.target, self.query_hsp_start, self.query_hsp_end,
            self.target_hsp_start, self.target_hsp_end, self.hsp_evalue
        ]
        return "\t".join([str(x) for x in line])

    # @profile

    @classmethod
    def as_dict_static(cls, state_obj):
        """Method to return a dict representation of the object. Necessary for storing.
        This method returns a dictionary *without any attribute that requires joined data*.
        As a static method, it is useful to be used outside of the class.
        :param state_obj: an instance of the HSP class or a
        collections.namedtuple object from a direct query

        :rtype : dict
        """

        keys = [
            "query_hsp_start", "query_hsp_end", "target_hsp_start",
            "target_hsp_end", "hsp_evalue", "hsp_bits", "match", "query_frame",
            "target_frame"
        ]

        state = dict().fromkeys(keys)
        for key in keys:
            state[key] = getattr(state_obj, key)
        return state

    def as_dict(self):
        """Method to return a dict representation of the object. Necessary for storing.
        This method returns a dictionary *without any attribute that requires joined data*.
        It is meant to be used only by the method as_dict of the Hit class."""

        return self.as_dict_static(self)

    def as_full_dict(self):
        """Method to return a dict representation of the object.
        This method also checks query name and hit name, so it is slower than as_dict and used
        when it is necessary to retrieve data independently from Hit.
        """

        state = self.as_dict()
        state["query"] = self.query
        state["target"] = self.target
        state["query_hsp_cov"] = self.query_hsp_cov
        state["target_hsp_cov"] = self.target_hsp_cov
        state["match"] = self.match

        return state

    @hybrid_property
    def query_hsp_cov(self):
        """This property returns the percentage of the query which is covered by the HSP."""
        val = (self.query_hsp_end - self.query_hsp_start + 1)
        val /= self.query_length
        return val

    @hybrid_property
    def target_hsp_cov(self):
        """This property returns the percentage of the target which is covered by the HSP."""
        val = (self.target_hsp_end - self.target_hsp_start + 1)
        val /= self.target_length
        return val