Esempio n. 1
0
 def test_intron_annotation(self):
     """sequences annotated with Introns should return correct seq"""
     for symbol, stable_id, rank, exp_seq5, exp_seq3 in [
             ('IL2', 'ENST00000226730', 1, 'gtaagtatat', 'actttcttag'),
             ('IL13', 'ENST00000304506', 3, 'gtaaggcatc', 'tgtcctgcag')]:
         gene = asserted_one(self.human.getGenesMatching(Symbol=symbol))
         seq = gene.getAnnotatedSeq(feature_types='gene')
         intron = asserted_one(seq.getAnnotationsMatching('intron',
                                             '%s-%d'%(stable_id, rank)))
         intron_seq = str(seq.getRegionCoveringAll(intron).getSlice())
         self.assertEqual(intron_seq[:10], exp_seq5.upper())
         self.assertEqual(intron_seq[-10:], exp_seq3.upper())
Esempio n. 2
0
 def test_intron_number(self):
     """number of introns should be correct"""
     for gene_id, transcript_id, exp_number in [
                         ('ENSG00000227268', 'ENST00000445946', 0),
                         ('ENSG00000132199', 'ENST00000319815', 8),
                         ('ENSG00000132199', 'ENST00000383578', 15)]:
         gene = asserted_one(self.human.getGenesMatching(StableId=gene_id))
         transcript = asserted_one(
             [t for t in gene.Transcripts if t.StableId==transcript_id])
         if exp_number == 0:
             self.assertEqual(transcript.Introns, None)
         else:
             self.assertEqual(len(transcript.Introns), exp_number)
Esempio n. 3
0
 def test_intron_annotation(self):
     """sequences annotated with Introns should return correct seq"""
     for symbol, stable_id, rank, exp_seq5, exp_seq3 in [
         ('IL2', 'ENST00000226730', 1, 'gtaagtatat', 'actttcttag'),
         ('IL13', 'ENST00000304506', 3, 'gtaaggcatc', 'tgtcctgcag')
     ]:
         gene = asserted_one(self.human.getGenesMatching(Symbol=symbol))
         seq = gene.getAnnotatedSeq(feature_types='gene')
         intron = asserted_one(
             seq.getAnnotationsMatching('intron',
                                        '%s-%d' % (stable_id, rank)))
         intron_seq = str(seq.getRegionCoveringAll(intron).getSlice())
         self.assertEqual(intron_seq[:10], exp_seq5.upper())
         self.assertEqual(intron_seq[-10:], exp_seq3.upper())
Esempio n. 4
0
 def test_intron_number(self):
     """number of introns should be correct"""
     for gene_id, transcript_id, exp_number in [
         ('ENSG00000227268', 'ENST00000445946', 0),
         ('ENSG00000132199', 'ENST00000583771', 5),
         ('ENSG00000132199', 'ENST00000340116', 14)
     ]:
         gene = asserted_one(self.human.getGenesMatching(StableId=gene_id))
         transcript = asserted_one(
             [t for t in gene.Transcripts if t.StableId == transcript_id])
         if exp_number == 0:
             self.assertEqual(transcript.Introns, None)
         else:
             self.assertEqual(len(transcript.Introns), exp_number)
Esempio n. 5
0
 def _get_exon_record(self):
     # this will be called by _Region parent class to make the location
     exon_table = self.db.getTable('exon')
     query = sql.select([exon_table], exon_table.c.exon_id == self.exon_id)
     records = query.execute()
     record = asserted_one(records.fetchall())
     self._table_rows['exon'] = record
Esempio n. 6
0
 def _get_exon_stable_id_record(self):
     exon_stable_id_table = self.db.getTable('exon_stable_id')
     query = sql.select([exon_stable_id_table.c.stable_id],
                        exon_stable_id_table.c.exon_id == self.exon_id)
     records = query.execute()
     record = asserted_one(records.fetchall())
     self._table_rows['exon_stable_id'] = record
Esempio n. 7
0
 def _get_flanking_seq_data(self):
     # maps to flanking_sequence through variation_feature_id
     # if this fails, we grab from genomic sequence
     variation_id = self._table_rows['variation_feature']['variation_id']
     flanking_seq_table = self.flanking_sequence_table
     query = sql.select([flanking_seq_table],
                 flanking_seq_table.c.variation_id == variation_id)
     record = asserted_one(query.execute())
     self._table_rows['flanking_sequence'] = record
     up_seq = record['up_seq']
     down_seq = record['down_seq']
     # the following two lines are because -- wait for it -- someone has
     # entered the string 'NULL' instead of NULL in the MySQL tables!!!
     up_seq = [up_seq, None][up_seq == 'NULL']
     down_seq = [down_seq, None][down_seq == 'NULL']
     seqs = dict(up=up_seq, down=down_seq)
     for name, seq in seqs.items():
         if seq is not None:
             seq = DNA.makeSequence(seq)
         else:
             resized = [(-301, -1), (1, 301)][name == 'down']
             if self.Location.Strand == -1:
                 resized = [(1, 301), (-301, -1)][name == 'down']
             flank = self.Location.resized(*resized)
             flanking = self.genome.getRegion(region=flank)
             seq = flanking.Seq
         seqs[name] = seq
     
     self._cached[('FlankingSeq')] = (seqs['up'][-300:],seqs['down'][:300])
Esempio n. 8
0
 def _get_simple_features(self, db, klass, target_coord, query_coord,
                          where_feature):
     """returns feature_type records for the query_coord from the
     simple_feature table. The returned coord is referenced to
     target_coord. At present, only CpG islands being queried."""
     simple_feature_table = db.getTable('simple_feature')
     feature_types = ['CpGisland']
     feature_type_ids=[str(self._feature_type_ids.get(f)) for f in feature_types]
     # fix the following
     query = sql.select([simple_feature_table],
         sql.and_(simple_feature_table.c.analysis_id.in_(feature_type_ids),
         simple_feature_table.c.seq_region_id == query_coord.seq_region_id))
     query = location_query(simple_feature_table,query_coord.EnsemblStart,
                     query_coord.EnsemblEnd, query=query,
                     where=where_feature)
     records = query.execute()
     for record in records:
         coord = Coordinate(self, CoordName=query_coord.CoordName,
                         Start=record['seq_region_start'],
                         End = record['seq_region_end'],
                         seq_region_id=record['seq_region_id'],
                         Strand = record['seq_region_strand'],
                         ensembl_coord=True)
         if query_coord.CoordName != target_coord.CoordName:
             coord = asserted_one(get_coord_conversion(coord, target_coord.CoordType, self.CoreDb))[1]
             
         # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
         # coord = coord.makeRelativeTo(target_coord, False)
         yield klass(self, db, Location=coord, Score=record['score'])
Esempio n. 9
0
 def _get_repeat_features(self, db, klass, target_coord, query_coord, where_feature):
     """returns Repeat region instances"""
     # we build repeats using coordinates from repeat_feature table
     # the repeat_consensus_id is required to get the repeat name, class
     # and type
     repeat_feature_table = db.getTable("repeat_feature")
     query = sql.select([repeat_feature_table], repeat_feature_table.c.seq_region_id == query_coord.seq_region_id)
     query = location_query(
         repeat_feature_table, query_coord.EnsemblStart, query_coord.EnsemblEnd, query=query, where=where_feature
     )
     for record in query.execute():
         coord = Coordinate(
             self,
             CoordName=query_coord.CoordName,
             Start=record["seq_region_start"],
             End=record["seq_region_end"],
             seq_region_id=record["seq_region_id"],
             Strand=record["seq_region_strand"],
             ensembl_coord=True,
         )
         if query_coord.CoordName != target_coord.CoordName:
             coord = asserted_one(get_coord_conversion(coord, target_coord.CoordType, self.CoreDb))[1]
         # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
         # coord = coord.makeRelativeTo(target_coord, False)
         yield klass(self, db, Location=coord, Score=record["score"], data=record)
Esempio n. 10
0
 def _get_repeat_features(self, db, klass, target_coord, query_coord,
                          where_feature):
     """returns Repeat region instances"""
     # we build repeats using coordinates from repeat_feature table
     # the repeat_consensus_id is required to get the repeat name, class
     # and type
     repeat_feature_table = db.getTable('repeat_feature')
     query = sql.select(
         [repeat_feature_table],
         repeat_feature_table.c.seq_region_id == query_coord.seq_region_id)
     query = location_query(repeat_feature_table,
                            query_coord.EnsemblStart,
                            query_coord.EnsemblEnd,
                            query=query,
                            where=where_feature)
     for record in query.execute():
         coord = Coordinate(self,
                            CoordName=query_coord.CoordName,
                            Start=record['seq_region_start'],
                            End=record['seq_region_end'],
                            seq_region_id=record['seq_region_id'],
                            Strand=record['seq_region_strand'],
                            ensembl_coord=True)
         if query_coord.CoordName != target_coord.CoordName:
             coord = asserted_one(
                 get_coord_conversion(coord, target_coord.CoordType,
                                      self.CoreDb))[1]
         # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
         # coord = coord.makeRelativeTo(target_coord, False)
         yield klass(self,
                     db,
                     Location=coord,
                     Score=record['score'],
                     data=record)
Esempio n. 11
0
 def _get_gene_features(self, db, klass, target_coord, query_coord,
                        where_feature):
     """returns all genes"""
     xref_table = [None, db.getTable('xref')][db.Type == 'core']
     gene_table = db.getTable('gene')
     
     # after release 65, the gene_id_table is removed. The following is to maintain
     # support for earlier releases.
     if self.GeneralRelease >= 65:
         gene_id_table = None
     else:
         gene_id_table = db.getTable('gene_stable_id')
     
     # note gene records are at chromosome, not contig, level
     condition = gene_table.c.seq_region_id == query_coord.seq_region_id
     query = self._build_gene_query(db, condition, gene_table, gene_id_table, xref_table)
     query = location_query(gene_table, query_coord.EnsemblStart,
                 query_coord.EnsemblEnd, query=query, where=where_feature)
     
     for record in query.execute():
         new = Coordinate(self, CoordName=query_coord.CoordName,
                         Start=record['seq_region_start'],
                         End = record['seq_region_end'],
                         Strand = record['seq_region_strand'], 
                         seq_region_id=record['seq_region_id'],
                         ensembl_coord=True)
         if query_coord.CoordName != target_coord.CoordName:
             coord = asserted_one(get_coord_conversion(coord, target_coord.CoordType, self.CoreDb))[1]
         
         # TODO: check coord, used 'new' here. where is coord (above line) used? 
         gene = klass(self, db, Location=new, data=record)
         yield gene
Esempio n. 12
0
 def _get_seq_region_record(self, seq_region_id):
     # should this be on a parent class? or a generic function in assembly?
     seq_region_table = self.db.getTable('seq_region')
     query = sql.select([seq_region_table],
                     seq_region_table.c.seq_region_id == seq_region_id)
     record = asserted_one(query.execute())
     return record
Esempio n. 13
0
 def _get_transcript_stable_id_record(self):
     table_name = self._attr_ensembl_table_map['StableId']
     if table_name in self._table_rows:
         return
     transcript_id = self.transcript_id
     table = self.db.getTable(table_name)
     query = sql.select([table], table.c.transcript_id == transcript_id)
     record = asserted_one(query.execute())
     self._table_rows[table_name] = record
Esempio n. 14
0
 def _get_cpg_island_analysis_id(self):
     analysis_description_table = self.genome.CoreDb.getTable("analysis_description")
     query = sql.select(
         [analysis_description_table.c.analysis_id], analysis_description_table.c.display_label.like("%CpG%")
     )
     record = asserted_one(query.execute())
     self._table_rows["analysis_description"] = record
     quoted_limited = lambda x: DisplayString(x, with_quotes=True, num_words=2)
     self._populate_cache_from_record([("CpGisland", "analysis_id", quoted_limited)], "analysis_description")
 def _get_cigar_record(self):
     genomic_align_table = \
             self.parent.compara.ComparaDb.getTable('genomic_align')
     query = sql.select([genomic_align_table.c.cigar_line],
                 genomic_align_table.c.genomic_align_id == \
                                     self._cached['genomic_align_id'])
     record = asserted_one(query.execute())
     self._cached['cigar_line'] = record['cigar_line']
     return record
Esempio n. 16
0
 def _get_cigar_record(self):
     genomic_align_table = \
             self.parent.compara.ComparaDb.getTable('genomic_align')
     query = sql.select([genomic_align_table.c.cigar_line],
                 genomic_align_table.c.genomic_align_id == \
                                     self._cached['genomic_align_id'])
     record = asserted_one(query.execute())
     self._cached['cigar_line'] = record['cigar_line']
     return record
Esempio n. 17
0
 def _get_seq_region_id(self, CoordName):
     """returns the seq_region_id for the provided CoordName"""
     seq_region_table = self.CoreDb.getTable('seq_region')
     coord_systems = CoordSystem(core_db=self.CoreDb)
     coord_system_ids = [k for k in coord_systems if type(k) not in (unicode, str)]
     record = sql.select([seq_region_table.c.seq_region_id],
                 sql.and_(seq_region_table.c.name == CoordName,
             seq_region_table.c.coord_system_id.in_(coord_system_ids)))
     record = asserted_one(record.execute().fetchall())
     return record['seq_region_id']
Esempio n. 18
0
 def _get_seq_region_id(self, CoordName):
     """returns the seq_region_id for the provided CoordName"""
     seq_region_table = self.CoreDb.getTable('seq_region')
     coord_systems = CoordSystem(core_db=self.CoreDb)
     coord_system_ids = [k for k in coord_systems if not isinstance(k, str)]
     record = sql.select(
         [seq_region_table.c.seq_region_id],
         sql.and_(seq_region_table.c.name == CoordName,
                  seq_region_table.c.coord_system_id.in_(coord_system_ids)))
     record = asserted_one(record.execute().fetchall())
     return record['seq_region_id']
Esempio n. 19
0
 def _get_translation_record(self):
     transcript_id = self.transcript_id
     translation_table = self.db.getTable('translation')
     query = sql.select([translation_table],
                     translation_table.c.transcript_id == transcript_id)
     try:
         record = asserted_one(query.execute())
     except NoItemError:
         self._set_null_values(['TranslatedExons'], 'translation')
         return
     self._table_rows['translation'] = record
Esempio n. 20
0
    def test_intron(self):
        """should get correct Intron sequence, regardless of strand"""
        # IL2 is on - strand, IL13 is on + strand, both have three introns
        IL2_exp_introns = [
            (1, 122456203, 122456293, 'gtaagtatat', 'actttcttag'),
            (2, 122453853, 122456143, 'gtaagtacaa', 'attattctag'),
            (3, 122451862, 122453709, 'gtaaggcatt', 'tcttttatag')
        ]
        IL13_exp_introns = [
            (1, 132658360, 132659417, 'gtgagtgtcg', 'gctcccacag'),
            (2, 132659471, 132659723, 'gtaaggacct', 'ctccccacag'),
            (3, 132659828, 132660174, 'gtaaggcatc', 'tgtcctgcag')
        ]

        for symbol, stable_id, exp_introns in [
            ('IL2', 'ENST00000226730', IL2_exp_introns),
            ('IL13', 'ENST00000304506', IL13_exp_introns)
        ]:
            gene = asserted_one(self.human.getGenesMatching(Symbol=symbol))
            strand = gene.Location.Strand
            transcript = asserted_one(
                [t for t in gene.Transcripts if t.StableId == stable_id])
            introns = transcript.Introns
            self.assertEqual(len(introns), len(exp_introns))
            idx = 0
            for intron in introns:
                loc = intron.Location
                start, end = loc.Start, loc.End
                seq = str(intron.Seq)
                exp_rank, exp_start, exp_end, exp_seq5, \
                                    exp_seq3 = exp_introns[idx]
                self.assertEqual(loc.Strand, strand)
                # test the order using rank
                self.assertEqual(intron.Rank, exp_rank)
                # test position
                self.assertEqual(start, exp_start)
                self.assertEqual(end, exp_end)
                # test sequence
                self.assertEqual(seq[:10], exp_seq5.upper())
                self.assertEqual(seq[-10:], exp_seq3.upper())
                idx += 1
Esempio n. 21
0
 def _get_repeat_consensus_record(self):
     repeat_consensus_table = self.db.getTable('repeat_consensus')
     repeat_consensus_id = self._table_rows['repeat_feature']['repeat_consensus_id']
     record = sql.select([repeat_consensus_table],
     repeat_consensus_table.c.repeat_consensus_id == repeat_consensus_id)
     record = asserted_one(record.execute().fetchall())
     self._table_rows['repeat_consensus'] = record
     limit_length = lambda x : DisplayString(x, repr_length=10)
     attr_column_map = [('Symbol', 'repeat_name', _quoted),
                        ('RepeatClass', 'repeat_class', _quoted),
                        ('RepeatType', 'repeat_type', _quoted),
                        ('Consensus', 'repeat_consensus', limit_length)]
     self._populate_cache_from_record(attr_column_map, 'repeat_consensus')
Esempio n. 22
0
 def _get_cpg_island_analysis_id(self):
     analysis_description_table = \
                          self.genome.CoreDb.getTable('analysis_description')
     query = sql.select(
         [analysis_description_table.c.analysis_id],
         analysis_description_table.c.display_label.like('%CpG%'))
     record = asserted_one(query.execute())
     self._table_rows['analysis_description'] = record
     quoted_limited = lambda x: DisplayString(
         x, with_quotes=True, num_words=2)
     self._populate_cache_from_record(
         [('CpGisland', 'analysis_id', quoted_limited)],
         'analysis_description')
Esempio n. 23
0
 def _get_seq_region_record(self, CoordName):
     # override the _Region class method, since, we take the provided Start
     # etc .. attributes
     # CoordName comes from seq_region_table.c.name
     # matched, by coord_system_id, to default coord system
     seq_region_table = self.genome.db.getTable('seq_region')
     coord_systems = CoordSystem(core_db=self.genome.CoreDb)
     coord_system_ids = [k for k in coord_systems if not isinstance(k, str)]
     record = sql.select([seq_region_table],
                 sql.and_(seq_region_table.c.name == CoordName,
             seq_region_table.c.coord_system_id.in_(coord_system_ids)))
     record = asserted_one(record.execute().fetchall())
     self._table_rows['seq_region'] = record
Esempio n. 24
0
    def test_intron(self):
        """should get correct Intron sequence, regardless of strand"""
        # IL2 is on - strand, IL13 is on + strand, both have three introns
        IL2_exp_introns = [
                    (1, 123377358, 123377448, 'gtaagtatat', 'actttcttag'),
                    (2, 123375008, 123377298, 'gtaagtacaa', 'attattctag'),
                    (3, 123373017,123374864, 'gtaaggcatt', 'tcttttatag')]
        IL13_exp_introns = [
                    (1, 131994052, 131995109, 'gtgagtgtcg', 'gctcccacag'),
                    (2, 131995163, 131995415, 'gtaaggacct', 'ctccccacag'),
                    (3, 131995520, 131995866, 'gtaaggcatc', 'tgtcctgcag')]

        for symbol, stable_id, exp_introns in [
                    ('IL2', 'ENST00000226730', IL2_exp_introns),
                    ('IL13', 'ENST00000304506', IL13_exp_introns)]:
            gene = asserted_one(self.human.getGenesMatching(Symbol=symbol))
            strand = gene.Location.Strand
            transcript = asserted_one(
                [t for t in gene.Transcripts if t.StableId==stable_id])
            introns = transcript.Introns
            self.assertEqual(len(introns), len(exp_introns))
            idx = 0
            for intron in introns:
                loc = intron.Location
                start, end = loc.Start, loc.End
                seq = str(intron.Seq)
                exp_rank, exp_start, exp_end, exp_seq5, \
                                    exp_seq3 = exp_introns[idx]
                self.assertEqual(loc.Strand, strand)
                # test the order using rank
                self.assertEqual(intron.Rank, exp_rank)
                # test position
                self.assertEqual(start, exp_start)
                self.assertEqual(end, exp_end)
                # test sequence
                self.assertEqual(seq[:10], exp_seq5.upper())
                self.assertEqual(seq[-10:], exp_seq3.upper())
                idx += 1
Esempio n. 25
0
 def __init__(self, genome, db, StableId=None, Symbol=None, Location=None, data=None):
     """constructed by a genome instance"""
     super(Gene, self).__init__(genome, db, Location=Location)
     if data is None:
         args = [dict(StableId=StableId), dict(Symbol=Symbol)][StableId is None]
         assert args
         data = asserted_one(list(self.genome._get_gene_query(db, **args).execute()))
     for name, func in \
                 [('StableId',self._get_gene_stable_id_record),
                  ('BioType', self._get_gene_record),
                  ('Description', self._get_gene_record),
                  ('Symbol', self._get_xref_record),
                  ('Location', self._get_gene_record)]:
         if name == 'Symbol' and 'display_label' not in data.keys(): # For EST
             continue
         self._table_rows[self._attr_ensembl_table_map[name]] = data
         func() # this populates the attributes
Esempio n. 26
0
def assembly_exception_coordinate(loc):
    """returns a coordinate conversion for one with an assembly exception"""
    genome = loc.genome
    assemb_except_table = genome.CoreDb.getTable('assembly_exception')
    seq_region_table = genome.CoreDb.getTable('seq_region')
    
    query = sql.select([assemb_except_table, seq_region_table.c.name],
                sql.and_(
                assemb_except_table.c.seq_region_id == \
                                            loc.seq_region_id,
                assemb_except_table.c.exc_seq_region_id == \
                                            seq_region_table.c.seq_region_id))
    query = location_query(assemb_except_table,
                    loc.Start, loc.End, query = query)
    record = asserted_one(query.execute().fetchall())
    s, conv_loc = _get_equivalent_coords(loc, record, "seq_region",
                    "exc_seq_region", loc.CoordType)
    return conv_loc
Esempio n. 27
0
def assembly_exception_coordinate(loc):
    """returns a coordinate conversion for one with an assembly exception"""
    genome = loc.genome
    assemb_except_table = genome.CoreDb.getTable('assembly_exception')
    seq_region_table = genome.CoreDb.getTable('seq_region')
    
    query = sql.select([assemb_except_table, seq_region_table.c.name],
                sql.and_(
                assemb_except_table.c.seq_region_id == \
                                            loc.seq_region_id,
                assemb_except_table.c.exc_seq_region_id == \
                                            seq_region_table.c.seq_region_id))
    query = location_query(assemb_except_table,
                    loc.Start, loc.End, query = query)
    record = asserted_one(query.execute().fetchall())
    s, conv_loc = _get_equivalent_coords(loc, record, "seq_region",
                    "exc_seq_region", loc.CoordType)
    return conv_loc
Esempio n. 28
0
 def _make_location(self):
     row = self._table_rows[self._attr_ensembl_table_map['Location']]
     if row is None:
         return
     seq_region_id = row['%sid' % self._location_column_prefix]
     start = row['%sstart' % self._location_column_prefix]
     end = row['%send' % self._location_column_prefix]
     strand = row['%sstrand' % self._location_column_prefix]
     seq_region_table = self.db.getTable('seq_region')
     query = sql.select([seq_region_table.c.name],
                      seq_region_table.c.seq_region_id == seq_region_id)
     result = asserted_one(query.execute().fetchall())
     coord_name = result['name']
     
     coord = Coordinate(genome = self.genome, CoordName=coord_name,
                 Start=start, End=end, Strand=strand,
                 seq_region_id=seq_region_id,
                 ensembl_coord=True)
     self._cached['Location'] = coord
Esempio n. 29
0
def _get_coord_type_and_seq_region_id(coord_name, core_db):
    seq_region_table = core_db.getTable('seq_region')
    rows = sql.select([seq_region_table]).\
        where(seq_region_table.c.name == coord_name).execute().fetchall()
    species_coord_sys = CoordSystem(species=core_db.db_name.Species,
                                    core_db=core_db)
    try:
        selected_row = asserted_one(rows)
    except ValueError:
        selected_row = None
        for row in rows:
            # not a default_version
            if not row['coord_system_id'] in species_coord_sys:
                continue
            elif not selected_row:
                selected_row = row
                break
        if selected_row is None:
            raise ValueError("Ambigous coordinate name: %s" % coord_name)
    coord_type = species_coord_sys[selected_row['coord_system_id']].name
    return selected_row, coord_type
Esempio n. 30
0
def _get_coord_type_and_seq_region_id(coord_name, core_db):
    seq_region_table = core_db.getTable('seq_region')
    rows = sql.select([seq_region_table]).\
        where(seq_region_table.c.name == str(coord_name)).execute().fetchall()
    species_coord_sys = CoordSystem(species=core_db.db_name.Species,
                                    core_db = core_db)
    try:
        selected_row = asserted_one(rows)
    except ValueError:
        selected_row = None
        for row in rows:
            # not a default_version
            if not row['coord_system_id'] in species_coord_sys:
                continue
            elif not selected_row:
                selected_row = row
                break
        if selected_row is None:
            raise ValueError("Ambigous coordinate name: %s" % coord_name)
    coord_type = species_coord_sys[selected_row['coord_system_id']].name
    return selected_row, coord_type
Esempio n. 31
0
    def _get_simple_features(self, db, klass, target_coord, query_coord,
                             where_feature):
        """returns feature_type records for the query_coord from the
        simple_feature table. The returned coord is referenced to
        target_coord. At present, only CpG islands being queried."""
        simple_feature_table = db.getTable('simple_feature')
        feature_types = ['CpGisland']
        feature_type_ids = [
            self._feature_type_ids.get(f) for f in feature_types
        ]
        # fix the following
        query = sql.select(
            [simple_feature_table],
            sql.and_(
                simple_feature_table.c.analysis_id.in_(feature_type_ids),
                simple_feature_table.c.seq_region_id ==
                query_coord.seq_region_id))
        query = location_query(simple_feature_table,
                               query_coord.EnsemblStart,
                               query_coord.EnsemblEnd,
                               query=query,
                               where=where_feature)
        records = query.execute()
        for record in records:
            coord = Coordinate(self,
                               CoordName=query_coord.CoordName,
                               Start=record['seq_region_start'],
                               End=record['seq_region_end'],
                               seq_region_id=record['seq_region_id'],
                               Strand=record['seq_region_strand'],
                               ensembl_coord=True)
            if query_coord.CoordName != target_coord.CoordName:
                coord = asserted_one(
                    get_coord_conversion(coord, target_coord.CoordType,
                                         self.CoreDb))[1]

            # coord = coord.makeRelativeTo(query_coord) #TODO: fix here if query_coord and target_coord have different coordName
            # coord = coord.makeRelativeTo(target_coord, False)
            yield klass(self, db, Location=coord, Score=record['score'])
Esempio n. 32
0
def _get_sequence_from_direct_assembly(coord=None, DEBUG=False):
    # TODO clean up use of a coord
    genome = coord.genome
    # no matter what strand user provide, we get the + sequence first
    coord.Strand = 1
    species = genome.Species
    coord_type = CoordSystem(species=species,
                             core_db=genome.CoreDb,
                             seq_level=True)

    if DEBUG:
        print('Created Coordinate:', coord, coord.EnsemblStart,
              coord.EnsemblEnd)
        print(coord.CoordType, coord_type)

    assemblies = get_coord_conversion(coord, coord_type, genome.CoreDb)

    if not assemblies:
        raise NoItemError('no assembly for %s' % coord)

    dna = genome.CoreDb.getTable('dna')
    seqs, positions = [], []
    for q_loc, t_loc in assemblies:
        assert q_loc.Strand == 1
        length = len(t_loc)
        # get MySQL to do the string slicing via substr function
        query = sql.select([
            substr(dna.c.sequence, t_loc.EnsemblStart,
                   length).label('sequence')
        ], dna.c.seq_region_id == t_loc.seq_region_id)
        record = asserted_one(query.execute().fetchall())
        seq = record['sequence']
        seq = DNA.makeSequence(seq)
        if t_loc.Strand == -1:
            seq = seq.rc()
        seqs.append(str(seq))
        positions.append((q_loc.Start, q_loc.End))
    sequence = _assemble_seq(seqs, coord.Start, coord.End, positions)
    return sequence
Esempio n. 33
0
 def _get_dnafrag_id_for_coord(self, coord):
     """returns the dnafrag_id for the coordnate"""
     dnafrag_table = self.ComparaDb.getTable('dnafrag')
     genome_db_table = self.ComparaDb.getTable('genome_db')
     
     # column renamed between versions
     prefix = coord.genome.Species.lower()
     if int(self.Release) > 58:
         prefix = _Species.getEnsemblDbPrefix(prefix)
     
     query = sql.select([dnafrag_table.c.dnafrag_id,
                        dnafrag_table.c.coord_system_name],
               sql.and_(dnafrag_table.c.genome_db_id ==\
                                         genome_db_table.c.genome_db_id,
                             genome_db_table.c.name == prefix,
                             dnafrag_table.c.name == coord.CoordName))
     try:
         record = asserted_one(query.execute().fetchall())
         dnafrag_id = record['dnafrag_id']
     except NoItemError:
         raise RuntimeError, 'No DNA fragment identified'
     return dnafrag_id
Esempio n. 34
0
    def _get_dnafrag_id_for_coord(self, coord):
        """returns the dnafrag_id for the coordnate"""
        dnafrag_table = self.ComparaDb.getTable('dnafrag')
        genome_db_table = self.ComparaDb.getTable('genome_db')

        # column renamed between versions
        prefix = coord.genome.Species.lower()
        if int(self.Release) > 58:
            prefix = _Species.getEnsemblDbPrefix(prefix)

        query = sql.select([dnafrag_table.c.dnafrag_id,
                           dnafrag_table.c.coord_system_name],
                  sql.and_(dnafrag_table.c.genome_db_id ==\
                                            genome_db_table.c.genome_db_id,
                                genome_db_table.c.name == prefix,
                                dnafrag_table.c.name == coord.CoordName))
        try:
            record = asserted_one(query.execute().fetchall())
            dnafrag_id = record['dnafrag_id']
        except NoItemError:
            raise RuntimeError, 'No DNA fragment identified'
        return dnafrag_id
Esempio n. 35
0
def _get_sequence_from_direct_assembly(coord=None, DEBUG=False):
    # TODO clean up use of a coord
    genome = coord.genome
    # no matter what strand user provide, we get the + sequence first
    coord.Strand = 1
    species = genome.Species
    coord_type = CoordSystem(species=species,core_db=genome.CoreDb,
                             seq_level=True)
    
    if DEBUG:
        print 'Created Coordinate:',coord,coord.EnsemblStart,coord.EnsemblEnd
        print coord.CoordType, coord_type
    
    assemblies = get_coord_conversion(coord, coord_type, genome.CoreDb)
    
    if not assemblies:
        raise NoItemError, 'no assembly for %s' % coord
    
    dna = genome.CoreDb.getTable('dna')
    seqs, positions = [], []
    for q_loc, t_loc in assemblies:
        assert q_loc.Strand == 1
        length = len(t_loc)
        # get MySQL to do the string slicing via substr function
        query = sql.select([substr(dna.c.sequence,
                                  t_loc.EnsemblStart,
                                  length).label('sequence')],
                            dna.c.seq_region_id == t_loc.seq_region_id)
        record = asserted_one(query.execute().fetchall())
        seq = record['sequence']
        seq = DNA.makeSequence(seq)
        if t_loc.Strand == -1:
            seq = seq.rc()
        seqs.append(str(seq))
        positions.append((q_loc.Start, q_loc.End))
    sequence = _assemble_seq(seqs, coord.Start, coord.End, positions)
    return sequence