def _build_value_tables(self): self.value_continuous = Table( "value_continuous", self.metadata, Column("person_id", ForeignKey("person.id")), Column("measure_id", ForeignKey("measure.id")), Column("value", Float(), nullable=False), PrimaryKeyConstraint("person_id", "measure_id"), ) self.value_ordinal = Table( "value_ordinal", self.metadata, Column("person_id", ForeignKey("person.id")), Column("measure_id", ForeignKey("measure.id")), Column("value", Float(), nullable=False), PrimaryKeyConstraint("person_id", "measure_id"), ) self.value_categorical = Table( "value_categorical", self.metadata, Column("person_id", ForeignKey("person.id")), Column("measure_id", ForeignKey("measure.id")), Column("value", String(127), nullable=False), PrimaryKeyConstraint("person_id", "measure_id"), ) self.value_other = Table( "value_other", self.metadata, Column("person_id", ForeignKey("person.id")), Column("measure_id", ForeignKey("measure.id")), Column("value", String(127), nullable=False), PrimaryKeyConstraint("person_id", "measure_id"), )
class CovidObservation(Base): __tablename__ = "covid_observations" dt = Column(Date) location_id = Column(String, FKCascade(Location.id)) variable_id = Column(Integer, FKCascade(CovidVariable.id)) demographic_id = Column(Integer, FKCascade(CovidDemographic.id)) provider_id = Column(Integer, FKCascade(CovidProvider.id)) value = Column(Numeric) last_updated = Column(DateTime, nullable=False, default=func.now()) source_url = Column(String) source_name = Column(String) deleted = Column(Boolean, nullable=False, default=False) delete_batch_id = Column(BigInteger) __table_args__ = ( PrimaryKeyConstraint( "dt", "location_id", "variable_id", "demographic_id", "provider_id", sqlite_on_conflict="REPLACE", ), { "schema": "data" }, )
class Temp_table(declarative_base()): __tablename__ = 'test_table' __table_args__ = (PrimaryKeyConstraint('country', 'vehicle_id', 'licence'), ) country = Column(String(4)) vehicle_id = Column(String(255)) licence = Column(String(255))
class External(DBBASE): """This class serialises transcript ids, source of the score, and score itself.""" __tablename__ = "external" query_id = Column(Integer, ForeignKey(Query.query_id), unique=False) source_id = Column(Integer, ForeignKey(ExternalSource.source_id), unique=False) ext_constraint = PrimaryKeyConstraint("query_id", "source_id", name="source_key") source = column_property( select([ExternalSource.source ]).where(ExternalSource.source_id == source_id)) score = Column(Float) query = column_property( select([Query.query_name]).where(Query.query_id == query_id)) __table_args__ = (ext_constraint, ) def __init__(self, query_id, source_id, score): self.query_id = query_id self.source_id = source_id self.score = score
class Followers(db.Model): __tablename__ = 'followers' __table_args__ = (PrimaryKeyConstraint('follower_id', 'followed_id'), ) follower_id = db.Column('follower_id', db.Integer, db.ForeignKey('users.id')) followed_id = db.Column('followed_id', db.Integer, db.ForeignKey('users.id'))
class Birdstock(db.Model): __tablename__ = 'bird_stock' # Shed number date_created = db.Column(db.DateTime, default=db.func.current_timestamp()) bshed = db.Column(db.Integer, db.ForeignKey('shed_info.shed'), nullable=False) age = db.Column(db.Float, nullable=False) btype = db.Column(db.String(128), nullable=False) quantity = db.Column(db.Integer, nullable=False) # Relastionships bsales = db.relationship('Birdsales', backref='bstock', single_parent=True) __table_args__ = ( PrimaryKeyConstraint('bshed', 'age', 'btype'), {}, ) def __init__(self, shed, age, btype, quantity): # shed belongs to the relationship backref varuable in shed_info self.shed = shed self.age = age self.btype = btype self.quantity = quantity
def _table(self, table_name): """Retrieves, reflects, and caches table objects Actual implementation of __getitem__ """ try: return self._table_cache[table_name] except KeyError: self.reflect_table(table_name) table = self.metadata.tables[table_name] table_dict = {'__table__': table, '__tablename__': table_name} # easy workaround for views which don't have primary key # this has to be replaced with method to recognize table primary key in future # if it isn't enough pk = table.primary_key if isinstance(pk, PrimaryKeyConstraint) and len(pk.columns) == 0: id_column = table.c.get('id') if id_column is not None: table.primary_key = PrimaryKeyConstraint(id_column) try: table_cls = type(str(table_name), (self.table_base, ), table_dict) self._table_cache[table_name] = table_cls return table_cls except ArgumentError: # This usually happens on join tables with no PKs logger.info( 'Unable to create table class for table "{}"'.format( table_name)) return None
class SearchWords(Base): __tablename__ = "session_search_words" session_id = Column("session_id", Integer) word_id = Column("word_id", Integer) word_temp_pk = PrimaryKeyConstraint(session_id, word_id) word_srch_word_id_words_id_foreign = ForeignKeyConstraint( columns=[session_id], refcolumns=[Session.id])
class ReportTableDefinition(JusticeCountsBase): """The definition for what a table within a report describes.""" __tablename__ = "report_table_definition" id = Column(Integer, autoincrement=True) system = Column(Enum(System)) metric_type = Column(Enum(MetricType)) measurement_type = Column(Enum(MeasurementType)) # Any dimensions where the data in the table only accounts for a subset of values for that dimension. For instance, # a table for the population metric may only cover data for the prison population, not those on parole or # probation. In that case filters would include a filter on population type. filtered_dimensions = Column(ARRAY(String(255))) # The value for each dimension from the above array. filtered_dimension_values = Column(ARRAY(String(255))) # The dimensions that the metric is broken down by in the report table. Each cell in a table instance has a unique # combination of values for the aggregated dimensions. Dimensions are sorted deterministically within the array. aggregated_dimensions = Column(ARRAY(String(255))) __table_args__ = tuple( [ PrimaryKeyConstraint(id), UniqueConstraint( metric_type, measurement_type, filtered_dimensions, filtered_dimension_values, aggregated_dimensions, ), ] )
class Compute(Base): __tablename__ = 'compute' id = Column(Integer, autoincrement=True) containerId = Column(String(100)) containerName = Column(String(50), primary_key=True) remoteName = Column(String(100)) imageName = Column(String(50)) deviceId = Column(String(50)) username = Column(String(50), primary_key=True) appPath = Column(String(100)) status = Column(String(50)) active = Column(Boolean) __total__args__ = (PrimaryKeyConstraint(containerName, username), {}) def __repr__(self): return "<compute('containerId='%s','containerName='%s', 'deviceId=%s' \ 'userName'='%s')>" % (self.containerId, self.containerName, self.deviceId, self.username) def get_dict(self): ret = { 'containerId': self.containerId, 'containerName': self.containerName, 'imageName': self.imageName, 'username': self.username, 'appPath': self.appPath, 'deviceId': self.deviceId, 'status': self.status } print(ret) return ret
class FollowedUser(Base): """Tracks which user follows which user. E.g. if user 1 follows user 2, user 1 will see changes of user 2 in their feed. """ __tablename__ = 'feed_followed_users' followed_user_id = Column( Integer, ForeignKey(users_schema + '.user.id'), nullable=False) followed_user = relationship( User, primaryjoin=followed_user_id == User.id ) follower_user_id = Column( Integer, ForeignKey(users_schema + '.user.id'), nullable=False) follower_user = relationship( User, primaryjoin=follower_user_id == User.id ) __table_args__ = ( PrimaryKeyConstraint(followed_user_id, follower_user_id), CheckConstraint( 'followed_user_id != follower_user_id', name='check_feed_followed_user_self_follow'), Base.__table_args__ )
class FilterArea(Base): """Feed filter on areas for a user. E.g. with an entry (user_id=1, area_id=2) the user 1 will only see changes in area 2 in their feed (except changes from followed users). """ __tablename__ = 'feed_filter_area' area_id = Column( Integer, ForeignKey(schema + '.areas.document_id'), nullable=False) area = relationship( Area, primaryjoin=area_id == Area.document_id) user_id = Column( Integer, ForeignKey(users_schema + '.user.id'), nullable=False, index=True) user = relationship( User, primaryjoin=user_id == User.id ) __table_args__ = ( PrimaryKeyConstraint(area_id, user_id), Base.__table_args__ )
class Report(JusticeCountsBase): """A document that is published by a source that contains data pertaining to the Justice Counts Framework. """ __tablename__ = 'report' id = Column(Integer, autoincrement=True) # The source that this report is published by. source_id = Column(Integer, nullable=False) # This distinguishes between the many types of reports that a single source may produce, e.g. a Daily Status # Report or Monthly Fact Sheet, that contain different information and are potentially fetched and parsed using # different logic. type = Column(String(255), nullable=False) # Identifies a specific instance of a given report type. It should be constructed such that it is unique within a # given report type and source. The combination of report type and instance is used when ingesting a report to # determine whether this is an update to an existing report or a new report. For PDF reports, this may simply be # the title of the document after some validation has been performed. For webpages it may need to be dynamically # generated. instance = Column(String(255), nullable=False) # The date the report was published. publish_date = Column(Date, nullable=False) # The method used to acquire the data (e.g. scraped). acquisition_method = Column(Enum(AcquisitionMethod), nullable=False) # TODO(#4485): Add a list of projects (e.g. Justice Counts, Spark) for which this data was ingested. __table_args__ = tuple([ PrimaryKeyConstraint(id), UniqueConstraint(source_id, type, instance), ForeignKeyConstraint([source_id], [Source.id])]) source = relationship(Source)
class External(DBBASE): """This class serialises transcript ids, source of the score, and score itself.""" __tablename__ = "external" query_id = Column(Integer, ForeignKey(Query.query_id), unique=False) source_id = Column(Integer, ForeignKey(ExternalSource.source_id), unique=False) ext_constraint = PrimaryKeyConstraint("query_id", "source_id", name="source_key") source = column_property(select([ExternalSource.source]).where( ExternalSource.source_id == source_id)) score = Column(String, nullable=False) query = column_property(select([Query.query_name]).where( Query.query_id == query_id)) valid_raw = column_property(select([ExternalSource.valid_raw]).where( ExternalSource.source_id == source_id)) rtype = column_property(select([ExternalSource.rtype]).where( ExternalSource.source_id == source_id)) __table_args__ = (ext_constraint, ) def __init__(self, query_id, source_id, score): self.query_id = query_id self.source_id = source_id if not isinstance(score, numbers.Number): raise sqlalchemy.exc.ArgumentError("Invalid score for external values: {}".format(type(score))) score = str(score) assert score.strip() self.score = str(score)
class SubjectExpert(db.Model): __tablename__ = 'SubjectExperts' __table_args__ = (PrimaryKeyConstraint('AuthorId', 'FileId'), ) FileId = db.Column(db.BigInteger, db.ForeignKey('Files.FileId')) AuthorId = db.Column(db.BigInteger, db.ForeignKey('Authors.AuthorId')) Score = db.Column(db.Integer, nullable=False)
class ReportTableInstance(JusticeCountsBase): """An instance of a table that contains an actual set of data points along a shared set of dimensions. It typically maps to a literal table with columns and rows in a report, but in some cases a literal table in the report may map to multiple tables as defined here. """ __tablename__ = "report_table_instance" id = Column(Integer, autoincrement=True) report_id = Column(Integer, nullable=False) report_table_definition_id = Column(Integer, nullable=False) # The window of time that values in this table cover, represented by a start date (inclusive) and an end date # (exclusive). The data could represent an instant measurement, where the start and end are equal, or a window, e.g. # ADP over the last month. time_window_start = Column(Date, nullable=False) time_window_end = Column(Date, nullable=False) # This field can be used to store any text that the source provides describing the methodology used to calculate # the data. This is stored on instances so that if it changes from report to report, we don't overwrite methodology # for prior instances. methodology = Column(String) __table_args__ = tuple( [ PrimaryKeyConstraint(id), # TODO(#4476): We need to include time window as part of the unique # constraint in case there is data for the same table definition that # represents multiple time windows within a single report. To make this # work with updates, I think we will re-ingest all table instances for # a particular report table definition in an updated report. UniqueConstraint( report_id, report_table_definition_id, time_window_start, time_window_end, ), ForeignKeyConstraint([report_id], [Report.id], ondelete="CASCADE"), ForeignKeyConstraint( [report_table_definition_id], [ReportTableDefinition.id] ), ] ) report = relationship(Report, back_populates="report_table_instances") report_table_definition = relationship(ReportTableDefinition) cells = relationship( "Cell", back_populates="report_table_instance", lazy="selectin", cascade="all, delete", passive_deletes=True, )
class Infos(Base): __tablename__ = 'infos' __table_args__ = ( PrimaryKeyConstraint('uuid'), ) uuid = Column(String(38), nullable=False, default=new_uuid) wazo_version = Column(String(64), nullable=False) live_reload_enabled = Column(Boolean, nullable=False, server_default='True') timezone = Column(String(128)) configured = Column(Boolean, nullable=False, server_default='False')
class Source(JusticeCountsBase): """A website or organization that publishes reports. It is not necessarily specific to a state or jurisdiction, but frequently is. """ __tablename__ = "source" id = Column(Integer, autoincrement=True) name = Column(String(255), nullable=False) __table_args__ = tuple([PrimaryKeyConstraint(id), UniqueConstraint(name)])
class AWL(Base): """Schema for the awl table""" __tablename__ = 'awl' username = Column("username", String(255)) email = Column("email", String(200)) ip = Column("ip", String(40)) count = Column("count", Integer) totscore = Column("totscore", Float) signedby = Column("signedby", String(255)) __table_args__ = (PrimaryKeyConstraint("username", "email", "signedby", "ip"), )
def __table_args__(cls): return ( PrimaryKeyConstraint( "dt", "location", "variable_id", "demographic_id", sqlite_on_conflict="REPLACE", ), ForeignKeyConstraint( [cls.location, cls.location_type], [Location.location, Location.location_type], ), )
class DoToday(Base): __tablename__ = "dotoday" date = Column(Date, primary_key=True, sqlite_on_conflict_unique='IGNORE', sqlite_on_conflict_primary_key='IGNORE') id = Column(Integer, primary_key=True, sqlite_on_conflict_unique='IGNORE', sqlite_on_conflict_primary_key='IGNORE') dotoday = Column(Boolean, default=False) PrimaryKeyConstraint('date', 'id', sqlite_on_conflict='IGNORE') UniqueConstraint('date', 'id', sqlite_on_conflict='IGNORE') def __repr__(self): return f'<User date:{self.date} id:{self.id} dotoday:{self.dotoday}>'
class Mater(declarative_base()): __tablename__ = 'mater' __table_args__ = ( PrimaryKeyConstraint('country', 'vehicle_id', 'licence'), ) country = Column(String(4)) vehicle_id = Column(String(255)) licence = Column(String(255)) col04 = Column(String(255)) col05 = Column(String(255)) col06 = Column(String(255)) col07 = Column(String(255)) col08 = Column(String(255)) col09 = Column(String(255)) col10 = Column(String(255)) col11 = Column(String(255)) col12 = Column(String(255)) col13 = Column(String(255)) col14 = Column(String(255)) col15 = Column(String(255)) col16 = Column(String(255)) col17 = Column(String(255)) col18 = Column(String(255)) col19 = Column(String(255)) col20 = Column(String(255)) col21 = Column(String(255)) col22 = Column(String(255)) col23 = Column(String(255)) col24 = Column(String(255)) col25 = Column(String(255)) col26 = Column(String(255)) col27 = Column(String(255)) col28 = Column(String(255)) col29 = Column(String(255)) col30 = Column(String(255)) col31 = Column(String(255)) col32 = Column(String(255)) col33 = Column(String(255)) col34 = Column(String(255)) col35 = Column(String(255)) col36 = Column(String(255)) col37 = Column(String(255)) col38 = Column(String(255)) col39 = Column(String(255)) col40 = Column(String(255))
class Cell(JusticeCountsBase): """A single data point within a table. """ __tablename__ = 'cell' id = Column(Integer, autoincrement=True) report_table_instance_id = Column(Integer, nullable=False) aggregated_dimension_values = Column(ARRAY(String(255)), nullable=False) value = Column(Numeric, nullable=False) __table_args__ = tuple([ PrimaryKeyConstraint(id), UniqueConstraint(report_table_instance_id, aggregated_dimension_values), ForeignKeyConstraint([report_table_instance_id], [ReportTableInstance.id])]) report_table_instance = relationship(ReportTableInstance)
class NetworkModuleAnnotation ( Base ): __tablename__ = "NetworkModuleAnnotation" # The module ID networkModule_id = Column( String, ForeignKey('NetworkModule.moduleID') ) # The annotation GOID annotationID = Column( String ) # The annotation name annotationTerm = Column( String ) # Define the Composite PrimaryKey __table_args__ = ( PrimaryKeyConstraint('networkModule_id', 'annotationID'), ) # ## The constructor def __init__(self, annotation_term, annotation_id): self.annotationName = annotation_term self.annotationID = annotation_id
class Birdsales(db.Model): __tablename__ = 'bird_sales' date_created = db.Column(db.DateTime, default=db.func.current_timestamp()) age = db.Column(db.Float, nullable=False) btype = db.Column(db.String(128), nullable=False) quantity = db.Column(db.Integer, nullable=False) amount = db.Column(db.Float, nullable=False) bshed = db.Column(db.Integer, db.ForeignKey( 'bird_stock.bshed'), nullable=False) __table_args__ = ( PrimaryKeyConstraint('bshed', 'age', 'btype', 'quantity', 'amount'), {}, ) def __init__(self, age, btype, quantity, amount, bstock): # bstock belongs to the relationship backref varuable in bird_stock table self.bstock = bstock self.age = age self.btype = btype self.quantity = quantity self.amount = amount
class Reserve(Base): __tablename__ = "reserves" __table_args__ = (PrimaryKeyConstraint("sid", "bid", "day"), {}) sid = Column(Integer, ForeignKey("sailors.sid")) bid = Column(Integer, ForeignKey("boats.bid")) day = Column(DateTime) price = Column(Integer, default=0) rating = Column(Integer, default=-1) sailor = relationship("Sailor") def __init__(self, sid, bid, day, price, rating): self.sid = sid self.bid = bid self.day = datetime.datetime.strptime(day, "%Y-%m-%d") self.price = price self.rating = rating def __repr__(self): return f"<Reservation(sid={self.sid}, bid={self.bid}, day={self.day}, price={self.price})>"
class PistonCup(declarative_base()): __tablename__ = 'pistoncup' __table_args__ = ( PrimaryKeyConstraint('country', 'make', 'model'), ) country = Column(String(4)) make = Column(String(255)) model = Column(String(255)) amount_damage = Column(String(255)) rnk = Column(Integer) def __repr__(self): return f"pistoncup(country={self.country}; make={self.make}; model={self.model}; " \ f"amount_damage={self.amount_damage}; rank={self.rnk})" @classmethod def wipe_slate(cls): return sase.delete(cls) @classmethod def import_scoreboard(cls, top_x: sase.select) -> sase.insert: return sase.insert(cls).from_select(inspect(cls).columns.keys(), sase.select(top_x.columns))
class WeirdYears(declarative_base()): __tablename__ = 'weirdyears' __table_args__ = ( PrimaryKeyConstraint('country', 'make', 'model'), ) country = Column(String(4)) make = Column(String(255)) model = Column(String(255)) build_year = Column(String(255)) def __repr__(self): return f"weirdyears(country={self.country}; make={self.make}; model={self.model}; build_year={self.build_year})" @classmethod def save_weird_years(cls, source_table: Type[declarative_base] = Vehicle) -> sase.Insert: """ This function generates the statement to insert weird build_years with accompanying primary keys to this table. The `prefix_with("IGNORE")` is to run the query even though some strings cannot be converted to integers. The result is still as expected. :param source_table: Main table where all unsanitized data is stored, is. vehicles :return: insert statement object that can be executed against the database """ stmt = sase.insert(cls).prefix_with("IGNORE").from_select( inspect(cls).columns.keys(), sase.select( [source_table.country, source_table.vehicle_id, source_table.licence, source_table.build_year] ).where( sase.or_( sase.cast(source_table.build_year, Integer) < constants.MIN_YEAR, sase.cast(source_table.build_year, Integer) > constants.MAX_YEAR) ) ) return stmt
class Hit(DBBASE): """This class is used to serialise and store in a DB a BLAST hit. Stored attributes: - id Indexing key - query_id Foreign ID key for the query table - target_id Foreign ID key for the target table - qt_constrating """ __tablename__ = "hit" query_id = Column(Integer, ForeignKey(Query.query_id), unique=False) target_id = Column(Integer, ForeignKey(Target.target_id), unique=False) evalue = Column(Float) bits = Column(Float) global_identity = Column(Float) global_positives = Column(Float) query_start = Column(Integer) query_end = Column(Integer) target_start = Column(Integer) target_end = Column(Integer) hit_number = Column(Integer) query_multiplier = Column( Float) # Probably I should move this to a separate table! target_multiplier = Column(Float) query_aligned_length = Column(Integer) target_aligned_length = Column(Integer) # query_id, target_id, evalue, bits, global_identity, global_positives, query_start, query_end, target_start, # target_end, hit_number, query_multiplier, target_multiplier, query_aligned_length, target_aligned_length # Indices and constraints qt_constraint = PrimaryKeyConstraint("query_id", "target_id", name="hit_id") qt_index = Index("qt_index", "query_id", "target_id", unique=True) query_index = Index("hit_query_idx", "query_id", unique=False) target_index = Index("hit_target_idx", "target_id", unique=False) evalue_index = Index('hit_evalue_idx', 'evalue', unique=False) query_object = relationship(Query, uselist=False, lazy="select", innerjoin=True, backref=backref("hits", cascade="all, delete-orphan")) target_object = relationship(Target, uselist=False, lazy="select", innerjoin=True, backref=backref("hits", cascade="all, delete-orphan")) @hybrid_property def query(self): return self.query_object.query_name @hybrid_property def query_length(self): return self.query_object.query_length @hybrid_property def target(self): return self.target_object.target_name @hybrid_property def target_length(self): return self.target_object.target_length join_condition = "and_(Hit.query_id==Hsp.query_id, Hit.target_id==Hsp.target_id)" hsps = relationship(Hsp, uselist=True, lazy="subquery", backref=backref( "hit_object", uselist=False, overlaps="hits,target_object,query_object"), cascade="all, delete-orphan", single_parent=True, foreign_keys=[query_id, target_id], overlaps="hits,target_object,query_object", primaryjoin=join_condition) __table_args__ = (qt_constraint, qt_index, query_index, target_index, evalue_index) # All arguments are necessary and it is more convenient to have them here # rather than in a struct/dict/whatever # pylint: disable=too-many-arguments def __init__(self, query_id, target_id, evalue, bits, global_identity, global_positives, query_start, query_end, target_start, target_end, query_aligned_length, target_aligned_length, query_length, alignment, hit_number=1, query_multiplier=1, target_multiplier=1): """This function takes as input the id of a target, the id of the query, and a hit-object from the XML. The multiplier keyword is used to calculate the ratio between the query and the target. :param query_id: reference key for the Query table :type query_id: int :param target_id: reference key for the Target table :type target_id: int :param alignment: The BLAST Hit object :param evalue: Evalue of the hit (recovered from the "description" object) :type evalue: float :param bits: BitScore of the hit (recovered from the "description" object) :type bits: float :param hit_number: progressive index that indicates the priority of the hit in the database :type hit_number: int :param query_multiplier: either 1 or 3, it depends on the type of BLAST :type query_multiplier: int :param target_multiplier: either 1 or 3, it depends on the type of BLAST :type target_multiplier: int """ self.query_id, self.query_start, self.query_end = query_id, query_start, query_end self.query_aligned_length = query_aligned_length self.target_id, self.target_start, self.target_end = target_id, target_start, target_end self.target_aligned_length = target_aligned_length self.hit_number = hit_number self.evalue, self.bits = evalue, bits self.global_identity, self.global_positives = global_identity, global_positives self.query_multiplier = query_multiplier self.target_multiplier = target_multiplier # pylint: enable=too-many-arguments def __str__(self): line = [ self.query, self.target, self.evalue, self.bits, self.query_start, self.query_end, self.target_start, self.target_end, self.query_length, self.target_length ] return "\t".join(str(x) for x in line) @classmethod def from_hit(cls, query_id, target_id, query_length, alignment, query_multiplier=1, target_multiplier=1, hit_number=1) -> (object, List[Hsp]): """""" prepared_hit, prepared_hsps = prepare_hit( alignment, query_id, target_id, query_multiplier=query_multiplier, target_multiplier=target_multiplier, query_length=query_length) hsps = [] for counter, hsp in enumerate(prepared_hsps, start=1): hsps.append( Hsp.from_dict(counter=counter, query_id=query_id, target_id=target_id, hsp_dict=hsp)) new = Hit(query_id=query_id, target_id=target_id, query_multiplier=query_multiplier, hit_number=hit_number, target_multiplier=target_multiplier, **prepared_hit) return new, hsps @classmethod def as_dict_static(cls, state_obj): """Method to return a dict representation of the object. Static method to be called from outside the class. :param state_obj: a namedtuple or an instance of this class :rtype: dict """ keys = [ "evalue", "bits", "global_identity", "global_positives", "query_start", "query_end", "target_start", "target_end", "hit_number", "query_multiplier", "target_multiplier", "query_aligned_length", "target_aligned_length", ] state = dict().fromkeys(keys) for key in keys: state[key] = getattr(state_obj, key) return state @classmethod def as_full_dict_static(cls, hit_tuple, hsp_list, query_tuple, target_tuple): """ :param hit_tuple: Hit namedtuple (from direct query to the DB) :type hit_tuple: collections.namedtuple :param hsp_list: list of hsp dictionaries from Hsp.as_dict_static :type hsp_list: list(collections.namedtuple) :param query_tuple: Query namedtuple :type query_tuple: collections.namedtuple :param target_tuple: Target namedtuple :type target_tuple: collections.namedtuple :rtype: dict """ state = cls.as_dict_static(hit_tuple) hsps = [Hsp.as_dict_static(h) for h in hsp_list] state["query"] = query_tuple.query_name state["target"] = target_tuple.target_name state["query_length"] = query_tuple.query_length state["target_length"] = target_tuple.target_length state["query_hit_ratio"] = (query_tuple.query_length / hit_tuple.query_multiplier) / \ (target_tuple.target_length / hit_tuple.target_multiplier) state["hit_query_ratio"] = (target_tuple.target_length / hit_tuple.target_multiplier) / \ (query_tuple.query_length / hit_tuple.query_multiplier) state["query_cov"] = state[ "query_aligned_length"] / query_tuple.query_length assert state["query_cov"] <= 1, (state, ) state["target_cov"] = state[ "target_aligned_length"] / target_tuple.target_length assert state["target_cov"] <= 1, (state, ) state["hsps"] = [] for hsp in hsps: hsp["query_hsp_cov"] = (hsp["query_hsp_end"] - hsp["query_hsp_start"] + 1) hsp["query_hsp_cov"] /= (state["query_length"]) hsp["target_hsp_cov"] = (hsp["target_hsp_end"] - hsp["target_hsp_start"] + 1) hsp["target_hsp_cov"] /= (state["target_length"]) state["hsps"].append(hsp) return state def as_dict(self): """Method to return a dict representation of the object. Necessary for storing. :rtype: dict """ state = self.as_dict_static(self) # Retrieving the values ONCE query_object = self.query_object.as_tuple() target_object = self.target_object.as_tuple() state["query"] = query_object.query_name state["target"] = target_object.target_name state["query_length"] = query_object.query_length state["target_length"] = target_object.target_length state["query_hit_ratio"] = state["query_length"] / state["query_multiplier"] /\ (state["target_length"] / state["target_multiplier"]) state["hit_query_ratio"] = state["target_length"] / state["target_multiplier"] /\ (state["query_length"] / state["query_multiplier"]) state["hsps"] = [] for hsp in self.hsps: dict_hsp = hsp.as_dict() dict_hsp["query_hsp_cov"] = (dict_hsp["query_hsp_end"] - dict_hsp["query_hsp_start"] + 1) dict_hsp["query_hsp_cov"] /= state["query_length"] dict_hsp["target_hsp_cov"] = (dict_hsp["target_hsp_end"] - dict_hsp["target_hsp_start"] + 1) dict_hsp["target_hsp_cov"] /= state["target_length"] state["hsps"].append(dict_hsp) return state @hybrid_property def query_hit_ratio(self): """ This property returns the quotient (Query Length)/(Target Length) """ ratio = self.query_length / self.query_multiplier ratio /= self.target_length / self.target_multiplier return ratio @hybrid_property def hit_query_ratio(self): """ This property returns the quotient (Target Length)/(Query Length) """ ratio = self.target_length / self.target_multiplier ratio /= (self.query_length / self.query_multiplier) return ratio
class Hsp(DBBASE): r""" This class serializes and stores into the DB the various HSPs. It is directly connected to the Hit table, through the "hit_id" reference key. The Hit reference can be accessed through the hit_object attribute; back-reference (Hit to Hsps) is given by the "hsps" attribute. Keys: :return hit_id: Reference key for the Hit table :rtype hit_id: int :return counter: It indicates the progressive number of the HSP for the hit :rtype counter: int :return query_hsp_start: Start position on the query :rtype query_hsp_start; int :return query_hsp_end: End position on the query :rtype query_hsp_end: int :return target_hsp_start: Start position on the target :rtype target_hsp_start: int :return target_hsp_end: End position on the target :rtype target_hsp_end: int :return hsp_evalue: Evalue of the HSP :rtype hsp_evalue: float :return hsp_bits: Bit-score of the HSP :rtype hsp_bits: float :return hsp_identity: Identity (in %) of the alignment :rtype hsp_identity: float :return hsp_length: Length of the HSP :rtype hsp_length: int :return match: the match line between query and target, with the following specs: - If the position is a match/positive, keep the original value - If the position is a gap *for the query*, insert a - (dash) - If the position is a gap *for the target*, insert a _ (underscore) - If the position is a gap *for both*, insert a \ (backslash) An HSP row has the following constraints: - Counter,hit_id must be unique (and are primary keys) - The combination ("Hit_id","query_hsp_start","query_hsp_end", "target_hsp_start", "target_hsp_end") must be unique Moreover, the following properties are also present: :return query_object: The referenced Query :rtype query_object: Query :return target_object: The reference Target :rtype target_object: Target """ __tablename__ = "hsp" counter = Column(Integer) # Indicates the number of the HSP inside the hit query_id = Column(Integer, ForeignKey(Query.query_id), unique=False) target_id = Column(Integer, ForeignKey(Target.target_id), unique=False) pk_constraint = PrimaryKeyConstraint("counter", "query_id", "target_id", name="hsp_constraint") query_index = Index("hsp_query_idx", "query_id", unique=False) target_index = Index("hsp_target_idx", "target_id", unique=False) hsp_evalue_index = Index('hsp_evalue_idx', 'hsp_evalue', unique=False) combined_index = Index("hsp_combined_idx", "query_id", "target_id", unique=False) full_index = Index("hsp_full_idx", "counter", "query_id", "target_id", unique=True) query_hsp_start = Column(Integer) query_hsp_end = Column(Integer) query_frame = Column(Integer) target_hsp_start = Column(Integer) target_hsp_end = Column(Integer) target_frame = Column(Integer) uni_constraint = UniqueConstraint("query_id", "target_id", "query_hsp_start", "query_hsp_end", "target_hsp_start", "target_hsp_end") match = Column(String(10000)) hsp_evalue = Column(Float) hsp_bits = Column(Float) hsp_identity = Column(Float) hsp_positives = Column(Float) hsp_length = Column(Integer) query_object = relationship(Query, uselist=False) target_object = relationship(Target, uselist=False) query = column_property( select([Query.query_name]).where(Query.query_id == query_id)) query_length = column_property( select([Query.query_length]).where(Query.query_id == query_id)) target = select([Target.target_name]).where(Target.target_id == target_id) target_length = select([Target.target_length ]).where(Target.target_id == target_id) __table_args__ = (pk_constraint, query_index, target_index, combined_index, hsp_evalue_index) def __init__(self, hsp, counter, query_id, target_id): """ :param hsp: an hsp object from the serialized XML :param counter: integer which indicates the HSP position in the HSP list for the hit :type counter: int :param query_id: Foreign key for the Query table :type query_id: int :param target_id: Foreign key for the Target table :type target_id: int """ self.counter = counter hsp_dict, _, _ = prepare_hsp(hsp, counter) for key in hsp_dict: setattr(self, key, hsp_dict[key]) self.query_id = query_id self.target_id = target_id def __str__(self): """Simple printing function.""" line = [ self.query, self.target, self.query_hsp_start, self.query_hsp_end, self.target_hsp_start, self.target_hsp_end, self.hsp_evalue ] return "\t".join([str(x) for x in line]) # @profile @classmethod def as_dict_static(cls, state_obj): """Method to return a dict representation of the object. Necessary for storing. This method returns a dictionary *without any attribute that requires joined data*. As a static method, it is useful to be used outside of the class. :param state_obj: an instance of the HSP class or a collections.namedtuple object from a direct query :rtype : dict """ keys = [ "query_hsp_start", "query_hsp_end", "target_hsp_start", "target_hsp_end", "hsp_evalue", "hsp_bits", "match", "query_frame", "target_frame" ] state = dict().fromkeys(keys) for key in keys: state[key] = getattr(state_obj, key) return state def as_dict(self): """Method to return a dict representation of the object. Necessary for storing. This method returns a dictionary *without any attribute that requires joined data*. It is meant to be used only by the method as_dict of the Hit class.""" return self.as_dict_static(self) def as_full_dict(self): """Method to return a dict representation of the object. This method also checks query name and hit name, so it is slower than as_dict and used when it is necessary to retrieve data independently from Hit. """ state = self.as_dict() state["query"] = self.query state["target"] = self.target state["query_hsp_cov"] = self.query_hsp_cov state["target_hsp_cov"] = self.target_hsp_cov state["match"] = self.match return state @hybrid_property def query_hsp_cov(self): """This property returns the percentage of the query which is covered by the HSP.""" val = (self.query_hsp_end - self.query_hsp_start + 1) val /= self.query_length return val @hybrid_property def target_hsp_cov(self): """This property returns the percentage of the target which is covered by the HSP.""" val = (self.target_hsp_end - self.target_hsp_start + 1) val /= self.target_length return val