def __init__(self, chromosome, strand, abundance, mappings, positions): """Create read object. Invoke with a constructor rather than directly. Keyword Arguments: chromosome -- reference sequence name, often a chromosome strand -- read strand relative to the reference ['+'|'-'|'.'] abundance -- identical reads represented by the same alignment mappings -- alignment positions for this read positions -- array of 1-based chromosome positions the read covers """ self.chromosome = chromosome if strand != "+" and strand != "-": self.strand = "." else: self.strand = strand if self.strand == "-": positions.reverse() self.position_array = positions if confirm_integer(abundance, "Abundance", minimum=1): self.abundance = int(abundance) if mappings == "Unknown": self.mappings = 1 elif confirm_integer(mappings, "Alignments", minimum=1): self.mappings = int(mappings)
def __init__(self, interval=1, padding_upstream=0, padding_downstream=0): """Return metagene instance defined by interval and padding sizes. Keyword arguments: interval -- length of interval (default 1) padding_upstream -- length of upstream padding (default 0) padding_downstream -- length of downstream padding (default 0) """ if confirm_integer(interval, "Interval", minimum=1): self.feature_interval = int(interval) self.padding = {'Upstream': None, 'Downstream': None} if confirm_integer(padding_upstream, "Upstream padding", minimum=0): self.padding['Upstream'] = int(padding_upstream) if confirm_integer(padding_downstream, "Downstream padding", minimum=0): self.padding['Downstream'] = int(padding_downstream) self.length = (self.padding['Upstream'] + self.feature_interval + self.padding['Downstream'])
def __init__(self, count_method, metagene_object, name, chromosome, start, end, strand, gap_counting=False, ignore_strand=False): """Not normally called directly; use Feature.create(file_format, count_method, metagene_object, feature_line, chromosome_conversion_table) to call indirectly. Define a new feature with an interval (represents feature length), up and downstream padding (defined by metagene_object), and genomic (1-based) start and end positions. Once defined here, the start and end represent the true start and end of the feature. Therefore, if a - strand (Crick strand) feature the start will be larger than the end. """ chromosome = Feature.chromosome_conversion[ chromosome] # convert to BAM-like chromosome designation if (confirm_integer(start, "Start", minimum=1, maximum=Read.chromosome_sizes[chromosome]) and confirm_integer(end, "End", minimum=1, maximum=Read.chromosome_sizes[chromosome])): start = int(start) end = int(end) # Define feature-specific metagene where feature_interval respresents # the length of the feature NOT the length of the final metagene interval if count_method == 'all': interval = (end - start + 1) # length of feature else: interval = 1 # length of the start (or end) of feature Metagene.__init__(self, interval, metagene_object.padding['Upstream'], metagene_object.padding['Downstream']) self.name = name self.chromosome = chromosome self.strand = strand self.metagene_length = metagene_object.feature_interval # define counts_array dictionary # key: orientation:gap_counts string # where orientation = {'unstranded', 'sense', 'antisense'} # gap_counts = {'ungapped', 'gapped, 'allreads'} # 'ungapped' + 'gapped' = 'allreads' # 'sense' + 'antisense' = 'unstranded' # # values: arrays of self.length initialized to 0 if self.strand != "+" and self.strand != "-": self.strand = "." orientation = ['unstranded'] elif ignore_strand: orientation = ['unstranded'] else: orientation = ['sense', 'antisense'] if gap_counting: gap_counts = ['ungapped', 'gapped'] else: gap_counts = ['allreads'] self.counts_array = {} for o in orientation: for g in gap_counts: self.counts_array["{}:{}".format(o, g)] = [] for p in range(self.length): #self.counts_array["{}:{}".format(o,g)].append(decimal.Decimal(0.0)) self.counts_array["{}:{}".format(o, g)].append(0) # define position_array # values : chromosomal 1-based nucleotide positions in 5' to 3' # orientation WITH RESPECT TO THE FEATURE # Example : # + strand: [10,11,12,13,14,15] # - strand: [15,14,13,12,11,10] # so position_array[0] is always the start of the feature (with upstream padding) # position_array[-1] is always the end of the feature (with downstream padding) self.position_array = [] if self.strand == "-": # chromosome start = feature end # chromosome end = feature start if count_method == 'start': start = end elif count_method == 'end': end = start region_start = start - self.padding[ 'Downstream'] # start is really end region_end = end + self.padding['Upstream'] # end is really start positions = range(region_start, region_end + 1) # inclusive list positions.reverse() else: if count_method == 'start': end = start # set both start and end to the start value elif count_method == 'end': start = end # set both start and end to the end value region_start = start - self.padding['Upstream'] region_end = end + self.padding['Downstream'] positions = range(region_start, region_end + 1) # inclusive list self.position_array = positions
def __init__( self, count_method, metagene_object, name, chromosome, start, end, strand, gap_counting=False, ignore_strand=False, ): """Not normally called directly; use Feature.create(file_format, count_method, metagene_object, feature_line, chromosome_conversion_table) to call indirectly. Define a new feature with an interval (represents feature length), up and downstream padding (defined by metagene_object), and genomic (1-based) start and end positions. Once defined here, the start and end represent the true start and end of the feature. Therefore, if a - strand (Crick strand) feature the start will be larger than the end. """ chromosome = Feature.chromosome_conversion[chromosome] # convert to BAM-like chromosome designation if confirm_integer(start, "Start", minimum=1, maximum=Read.chromosome_sizes[chromosome]) and confirm_integer( end, "End", minimum=1, maximum=Read.chromosome_sizes[chromosome] ): start = int(start) end = int(end) # Define feature-specific metagene where feature_interval respresents # the length of the feature NOT the length of the final metagene interval if count_method == "all": interval = end - start + 1 # length of feature else: interval = 1 # length of the start (or end) of feature Metagene.__init__(self, interval, metagene_object.padding["Upstream"], metagene_object.padding["Downstream"]) self.name = name self.chromosome = chromosome self.strand = strand self.metagene_length = metagene_object.feature_interval # define counts_array dictionary # key: orientation:gap_counts string # where orientation = {'unstranded', 'sense', 'antisense'} # gap_counts = {'ungapped', 'gapped, 'allreads'} # 'ungapped' + 'gapped' = 'allreads' # 'sense' + 'antisense' = 'unstranded' # # values: arrays of self.length initialized to 0 if self.strand != "+" and self.strand != "-": self.strand = "." orientation = ["unstranded"] elif ignore_strand: orientation = ["unstranded"] else: orientation = ["sense", "antisense"] if gap_counting: gap_counts = ["ungapped", "gapped"] else: gap_counts = ["allreads"] self.counts_array = {} for o in orientation: for g in gap_counts: self.counts_array["{}:{}".format(o, g)] = [] for p in range(self.length): # self.counts_array["{}:{}".format(o,g)].append(decimal.Decimal(0.0)) self.counts_array["{}:{}".format(o, g)].append(0) # define position_array # values : chromosomal 1-based nucleotide positions in 5' to 3' # orientation WITH RESPECT TO THE FEATURE # Example : # + strand: [10,11,12,13,14,15] # - strand: [15,14,13,12,11,10] # so position_array[0] is always the start of the feature (with upstream padding) # position_array[-1] is always the end of the feature (with downstream padding) self.position_array = [] if self.strand == "-": # chromosome start = feature end # chromosome end = feature start if count_method == "start": start = end elif count_method == "end": end = start region_start = start - self.padding["Downstream"] # start is really end region_end = end + self.padding["Upstream"] # end is really start positions = range(region_start, region_end + 1) # inclusive list positions.reverse() else: if count_method == "start": end = start # set both start and end to the start value elif count_method == "end": start = end # set both start and end to the end value region_start = start - self.padding["Upstream"] region_end = end + self.padding["Downstream"] positions = range(region_start, region_end + 1) # inclusive list self.position_array = positions