def __init__(self, genome, CoordName, Start, End, Strand = 1, CoordType = None, seq_region_id = None, ensembl_coord=False): if not CoordType or not (seq_region_id or Start or End): seq_region_data, CoordType = \ _get_coord_type_and_seq_region_id(CoordName, genome.CoreDb) seq_region_id = seq_region_data['seq_region_id'] Start = Start or 0 End = End or seq_region_data['length'] # TODO allow creation with just seq_region_id self.Species = genome.Species self.CoordType = DisplayString(CoordType, repr_length=4, with_quotes=False) self.CoordName = DisplayString(CoordName, repr_length=4, with_quotes=False) # if Start == End, we +1 to End, unless these are ensembl_coord's if ensembl_coord: Start -= 1 elif Start == End: End += 1 if Start > End: assert Strand == -1,\ "strand incorrect for start[%s] > end[%s]" % (Start, End) Start, End = End, Start self.Start = Start self.End = End self.Strand = convert_strand(Strand) self.seq_region_id = seq_region_id self.genome = genome
def getRegion(self, region=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns a single generic region for the specified coordinates Arguments: - region: a genomic region or a Coordinate instance - ensembl_coords: if True, follows indexing system of Ensembl where indexing starts at 1""" if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate(self, CoordName=CoordName, Start=Start, End=End, Strand=convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord) elif hasattr(region, 'Location'): region = region.Location return GenericRegion(self, self.CoreDb, CoordName=CoordName, Start=Start, End=End, Strand=Strand, Location=region, ensembl_coord=ensembl_coord)
def getRegion(self, region=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns a single generic region for the specified coordinates Arguments: - region: a genomic region or a Coordinate instance - ensembl_coords: if True, follows indexing system of Ensembl where indexing starts at 1""" if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate( self, CoordName=CoordName, Start=Start, End=End, Strand=convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord, ) elif hasattr(region, "Location"): region = region.Location return GenericRegion( self, self.CoreDb, CoordName=CoordName, Start=Start, End=End, Strand=Strand, Location=region, ensembl_coord=ensembl_coord, )
def getFeatures(self, region=None, feature_types=None, where_feature=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns Region instances for the specified location""" if isinstance(feature_types, str): feature_types = [feature_types] feature_types = [ft.lower() for ft in feature_types] feature_coord_levels = self._get_feature_coord_levels(feature_types) if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate(self, CoordName=CoordName, Start=Start, End=End, Strand=convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord) elif hasattr(region, 'Location'): region = region.Location coord = region # the coordinate system at which locations are to be referenced, and # the processing function target_coords_funcs = \ dict(cpg = (self._get_simple_features, CpGisland), repeat = (self._get_repeat_features, Repeat), gene = (self._get_gene_features, Gene), est = (self._get_gene_features, Est), variation = (self._get_variation_features, Variation)) known_types = set(target_coords_funcs.keys()) if not set(feature_types) <= known_types: raise RuntimeError, 'Unknown feature[%s], valid feature_types \ are: %s' % (set(feature_types) ^ known_types, known_types) for feature_type in feature_types: target_func, target_class = target_coords_funcs[feature_type] db = self.CoreDb if feature_type == 'est': db = self.OtherFeaturesDb feature_coords = feature_coord_levels[feature_type].levels for feature_coord in feature_coords: chrom_other_coords = get_coord_conversion(coord, feature_coord, db, where=where_feature) for chrom_coord, other_coord in chrom_other_coords: for region in target_func(db, target_class, chrom_coord, other_coord, where_feature): yield region
def test_strand_conversion(self): """should consistently convert strand info""" self.assertEquals(convert_strand(None), 1) self.assertEquals(convert_strand(-1), -1) self.assertEquals(convert_strand(1), 1) self.assertEquals(convert_strand('-'), -1) self.assertEquals(convert_strand('+'), 1) self.assertEquals(convert_strand(-1.0), -1) self.assertEquals(convert_strand(1.0), 1)
def getFeatures(self, region=None, feature_types=None, where_feature=None, CoordName=None, Start=None, End=None, Strand=None, ensembl_coord=False): """returns Region instances for the specified location""" if isinstance(feature_types, str): feature_types = [feature_types] feature_types = [ft.lower() for ft in feature_types] feature_coord_levels = self._get_feature_coord_levels(feature_types) if region is None: seq_region_id = self._get_seq_region_id(CoordName) region = Coordinate(self,CoordName=CoordName, Start=Start, End=End, Strand = convert_strand(Strand), seq_region_id=seq_region_id, ensembl_coord=ensembl_coord) elif hasattr(region, 'Location'): region = region.Location coord = region # the coordinate system at which locations are to be referenced, and # the processing function target_coords_funcs = \ dict(cpg = (self._get_simple_features, CpGisland), repeat = (self._get_repeat_features, Repeat), gene = (self._get_gene_features, Gene), est = (self._get_gene_features, Est), variation = (self._get_variation_features, Variation)) known_types = set(target_coords_funcs.keys()) if not set(feature_types) <= known_types: raise RuntimeError, 'Unknown feature[%s], valid feature_types \ are: %s' % (set(feature_types)^known_types, known_types) for feature_type in feature_types: target_func, target_class = target_coords_funcs[feature_type] db = self.CoreDb if feature_type == 'est': db = self.OtherFeaturesDb feature_coords = feature_coord_levels[feature_type].levels for feature_coord in feature_coords: chrom_other_coords = get_coord_conversion(coord, feature_coord, db, where=where_feature) for chrom_coord, other_coord in chrom_other_coords: for region in target_func(db, target_class, chrom_coord, other_coord, where_feature): yield region