def test_call(self): human_chrom = CoordSystem('chromosome', core_db=human.CoreDb, species='human') human_contig = CoordSystem(1, species='human') self.assertEqual(human_chrom.coord_system_id, 4) self.assertEqual(human_contig.name, 'contig') self.assertEqual(human_contig.attr, 'default_version, sequence_level')
def _set_species_feature_levels(self, species, core_db, feature_types, var_db, otherfeature_db): if species not in self._species_feature_levels: self._species_feature_levels[species] = {} self._species_feature_dbs[species] = [] coord_system = CoordSystem(core_db=core_db) if set(feature_types).intersection(set(['cpg', 'repeat', 'gene'])): if 'core_db' not in self._species_feature_dbs[species]: self._species_feature_dbs[species].append('core_db') records = self._get_meta_coord_records(core_db) self._add_species_feature_levels(species, records, 'core', coord_system) if 'variation' in feature_types: if 'var_db' not in self._species_feature_dbs[species]: self._species_feature_dbs[species].append('var_db') assert var_db is not None records = self._get_meta_coord_records(var_db) self._add_species_feature_levels(species, records, 'var', coord_system) if 'est' in feature_types: if 'otherfeature_db' not in self._species_feature_dbs[species]: self._species_feature_dbs[species].append('otherfeature_db') assert otherfeature_db is not None records = self._get_meta_coord_records(otherfeature_db) self._add_species_feature_levels(species, records, 'otherfeature', coord_system)
def _get_seq_region_id(self, CoordName): """returns the seq_region_id for the provided CoordName""" seq_region_table = self.CoreDb.getTable('seq_region') coord_systems = CoordSystem(core_db=self.CoreDb) coord_system_ids = [k for k in coord_systems if not isinstance(k, str)] record = sql.select( [seq_region_table.c.seq_region_id], sql.and_(seq_region_table.c.name == CoordName, seq_region_table.c.coord_system_id.in_(coord_system_ids))) record = asserted_one(record.execute().fetchall()) return record['seq_region_id']
def get_lower_coord_conversion(coord, species, core_db): coord_system = CoordSystem(species=species, core_db=core_db) seq_level_coord_type = CoordSystem(species=species,core_db=core_db, seq_level=True) query_rank = coord_system[coord.CoordType].rank seq_level_rank = coord_system[seq_level_coord_type].rank assemblies = None for rank in range(query_rank+1, seq_level_rank): coord_type = None for key in coord_system.keys(): if coord_system[key].rank == rank: coord_type = coord_system[key].name break if coord_type is None: continue assemblies = get_coord_conversion(coord, coord_type, core_db) if assemblies: break return assemblies
def get_lower_coord_conversion(coord, species, core_db): coord_system = CoordSystem(species=species, core_db=core_db) seq_level_coord_type = CoordSystem(species=species, core_db=core_db, seq_level=True) query_rank = coord_system[coord.CoordType].rank seq_level_rank = coord_system[seq_level_coord_type].rank assemblies = None for rank in range(query_rank + 1, seq_level_rank): coord_type = None for key in coord_system.keys(): if coord_system[key].rank == rank: coord_type = coord_system[key].name break if coord_type is None: continue assemblies = get_coord_conversion(coord, coord_type, core_db) if assemblies: break return assemblies
def _get_sequence_from_direct_assembly(coord=None, DEBUG=False): # TODO clean up use of a coord genome = coord.genome # no matter what strand user provide, we get the + sequence first coord.Strand = 1 species = genome.Species coord_type = CoordSystem(species=species, core_db=genome.CoreDb, seq_level=True) if DEBUG: print('Created Coordinate:', coord, coord.EnsemblStart, coord.EnsemblEnd) print(coord.CoordType, coord_type) assemblies = get_coord_conversion(coord, coord_type, genome.CoreDb) if not assemblies: raise NoItemError('no assembly for %s' % coord) dna = genome.CoreDb.getTable('dna') seqs, positions = [], [] for q_loc, t_loc in assemblies: assert q_loc.Strand == 1 length = len(t_loc) # get MySQL to do the string slicing via substr function query = sql.select([ substr(dna.c.sequence, t_loc.EnsemblStart, length).label('sequence') ], dna.c.seq_region_id == t_loc.seq_region_id) record = asserted_one(query.execute().fetchall()) seq = record['sequence'] seq = DNA.makeSequence(seq) if t_loc.Strand == -1: seq = seq.rc() seqs.append(str(seq)) positions.append((q_loc.Start, q_loc.End)) sequence = _assemble_seq(seqs, coord.Start, coord.End, positions) return sequence