Ejemplo n.º 1
0
    def __init__(self, chromosome, strand, abundance, mappings, positions):
        """Create read object. Invoke with a constructor rather than directly.
        
        Keyword Arguments:
        chromosome -- reference sequence name, often a chromosome
        strand -- read strand relative to the reference ['+'|'-'|'.']
        abundance -- identical reads represented by the same alignment
        mappings -- alignment positions for this read
        positions -- array of 1-based chromosome positions the read covers """
        self.chromosome = chromosome
        if strand != "+" and strand != "-":
            self.strand = "."
        else:
            self.strand = strand

        if self.strand == "-":
            positions.reverse()
        self.position_array = positions

        if confirm_integer(abundance, "Abundance", minimum=1):
            self.abundance = int(abundance)

        if mappings == "Unknown":
            self.mappings = 1
        elif confirm_integer(mappings, "Alignments", minimum=1):
            self.mappings = int(mappings)
Ejemplo n.º 2
0
    def __init__(self, chromosome, strand, abundance, mappings, positions):
        """Create read object. Invoke with a constructor rather than directly.
        
        Keyword Arguments:
        chromosome -- reference sequence name, often a chromosome
        strand -- read strand relative to the reference ['+'|'-'|'.']
        abundance -- identical reads represented by the same alignment
        mappings -- alignment positions for this read
        positions -- array of 1-based chromosome positions the read covers """
        self.chromosome = chromosome
        if strand != "+" and strand != "-":
            self.strand = "."
        else:
            self.strand = strand

        if self.strand == "-":
            positions.reverse()
        self.position_array = positions

        if confirm_integer(abundance, "Abundance", minimum=1):
            self.abundance = int(abundance)

        if mappings == "Unknown":
            self.mappings = 1
        elif confirm_integer(mappings, "Alignments", minimum=1):
            self.mappings = int(mappings)
Ejemplo n.º 3
0
 def __init__(self, interval=1, padding_upstream=0, padding_downstream=0):
     """Return metagene instance defined by interval and padding sizes.
     
     Keyword arguments:
     interval -- length of interval (default 1)
     padding_upstream -- length of upstream padding (default 0)
     padding_downstream -- length of downstream padding (default 0)
     """
     if confirm_integer(interval, "Interval", minimum=1):
         self.feature_interval = int(interval)
     self.padding = {'Upstream': None, 'Downstream': None}
     if confirm_integer(padding_upstream, "Upstream padding", minimum=0):
         self.padding['Upstream'] = int(padding_upstream)
     if confirm_integer(padding_downstream, "Downstream padding",
                        minimum=0):
         self.padding['Downstream'] = int(padding_downstream)
     self.length = (self.padding['Upstream'] + self.feature_interval +
                    self.padding['Downstream'])
Ejemplo n.º 4
0
 def __init__(self, interval=1, padding_upstream=0, padding_downstream=0):
     """Return metagene instance defined by interval and padding sizes.
     
     Keyword arguments:
     interval -- length of interval (default 1)
     padding_upstream -- length of upstream padding (default 0)
     padding_downstream -- length of downstream padding (default 0)
     """
     if confirm_integer(interval, "Interval", minimum=1):
         self.feature_interval = int(interval)
     self.padding = {'Upstream': None, 'Downstream': None}
     if confirm_integer(padding_upstream, "Upstream padding", minimum=0):
         self.padding['Upstream'] = int(padding_upstream)
     if confirm_integer(padding_downstream, "Downstream padding", minimum=0):
         self.padding['Downstream'] = int(padding_downstream)
     self.length = (self.padding['Upstream'] +
                    self.feature_interval +
                    self.padding['Downstream'])
Ejemplo n.º 5
0
    def __init__(self,
                 count_method,
                 metagene_object,
                 name,
                 chromosome,
                 start,
                 end,
                 strand,
                 gap_counting=False,
                 ignore_strand=False):
        """Not normally called directly; use Feature.create(file_format, count_method,
        metagene_object, feature_line, chromosome_conversion_table) to call indirectly.
        
        Define a new feature with an interval (represents feature length), 
        up and downstream padding (defined by metagene_object), and genomic 
        (1-based) start and end positions.
        
        Once defined here, the start and end represent the true start and end of
        the feature.  Therefore, if a - strand (Crick strand) feature the start
        will be larger than the end.
        """
        chromosome = Feature.chromosome_conversion[
            chromosome]  # convert to BAM-like chromosome designation
        if (confirm_integer(start,
                            "Start",
                            minimum=1,
                            maximum=Read.chromosome_sizes[chromosome]) and
                confirm_integer(end,
                                "End",
                                minimum=1,
                                maximum=Read.chromosome_sizes[chromosome])):
            start = int(start)
            end = int(end)

        # Define feature-specific metagene where feature_interval respresents
        # the length of the feature NOT the length of the final metagene interval
        if count_method == 'all':
            interval = (end - start + 1)  # length of feature
        else:
            interval = 1  # length of the start (or end) of feature

        Metagene.__init__(self, interval, metagene_object.padding['Upstream'],
                          metagene_object.padding['Downstream'])
        self.name = name
        self.chromosome = chromosome
        self.strand = strand
        self.metagene_length = metagene_object.feature_interval

        # define counts_array dictionary
        # key: orientation:gap_counts string
        #      where orientation = {'unstranded', 'sense', 'antisense'}
        #            gap_counts  = {'ungapped', 'gapped, 'allreads'}
        #      'ungapped' + 'gapped' = 'allreads'
        #      'sense' + 'antisense' = 'unstranded'
        #
        # values: arrays of self.length initialized to 0
        if self.strand != "+" and self.strand != "-":
            self.strand = "."
            orientation = ['unstranded']
        elif ignore_strand:
            orientation = ['unstranded']
        else:
            orientation = ['sense', 'antisense']
        if gap_counting:
            gap_counts = ['ungapped', 'gapped']
        else:
            gap_counts = ['allreads']

        self.counts_array = {}
        for o in orientation:
            for g in gap_counts:
                self.counts_array["{}:{}".format(o, g)] = []
                for p in range(self.length):
                    #self.counts_array["{}:{}".format(o,g)].append(decimal.Decimal(0.0))
                    self.counts_array["{}:{}".format(o, g)].append(0)

        # define position_array
        # values  : chromosomal 1-based nucleotide positions in 5' to 3'
        #           orientation WITH RESPECT TO THE FEATURE
        # Example :
        #       + strand:   [10,11,12,13,14,15]
        #       - strand:   [15,14,13,12,11,10]
        # so position_array[0] is always the start of the feature (with upstream padding)
        #    position_array[-1] is always the end of the feature (with downstream padding)
        self.position_array = []
        if self.strand == "-":
            # chromosome start = feature end
            # chromosome end   = feature start
            if count_method == 'start':
                start = end
            elif count_method == 'end':
                end = start
            region_start = start - self.padding[
                'Downstream']  # start is really end
            region_end = end + self.padding['Upstream']  # end is really start
            positions = range(region_start, region_end + 1)  # inclusive list
            positions.reverse()
        else:
            if count_method == 'start':
                end = start  # set both start and end to the start value
            elif count_method == 'end':
                start = end  # set both start and end to the end value
            region_start = start - self.padding['Upstream']
            region_end = end + self.padding['Downstream']
            positions = range(region_start, region_end + 1)  # inclusive list

        self.position_array = positions
Ejemplo n.º 6
0
    def __init__(
        self,
        count_method,
        metagene_object,
        name,
        chromosome,
        start,
        end,
        strand,
        gap_counting=False,
        ignore_strand=False,
    ):
        """Not normally called directly; use Feature.create(file_format, count_method,
        metagene_object, feature_line, chromosome_conversion_table) to call indirectly.
        
        Define a new feature with an interval (represents feature length), 
        up and downstream padding (defined by metagene_object), and genomic 
        (1-based) start and end positions.
        
        Once defined here, the start and end represent the true start and end of
        the feature.  Therefore, if a - strand (Crick strand) feature the start
        will be larger than the end.
        """
        chromosome = Feature.chromosome_conversion[chromosome]  # convert to BAM-like chromosome designation
        if confirm_integer(start, "Start", minimum=1, maximum=Read.chromosome_sizes[chromosome]) and confirm_integer(
            end, "End", minimum=1, maximum=Read.chromosome_sizes[chromosome]
        ):
            start = int(start)
            end = int(end)

        # Define feature-specific metagene where feature_interval respresents
        # the length of the feature NOT the length of the final metagene interval
        if count_method == "all":
            interval = end - start + 1  # length of feature
        else:
            interval = 1  # length of the start (or end) of feature

        Metagene.__init__(self, interval, metagene_object.padding["Upstream"], metagene_object.padding["Downstream"])
        self.name = name
        self.chromosome = chromosome
        self.strand = strand
        self.metagene_length = metagene_object.feature_interval

        # define counts_array dictionary
        # key: orientation:gap_counts string
        #      where orientation = {'unstranded', 'sense', 'antisense'}
        #            gap_counts  = {'ungapped', 'gapped, 'allreads'}
        #      'ungapped' + 'gapped' = 'allreads'
        #      'sense' + 'antisense' = 'unstranded'
        #
        # values: arrays of self.length initialized to 0
        if self.strand != "+" and self.strand != "-":
            self.strand = "."
            orientation = ["unstranded"]
        elif ignore_strand:
            orientation = ["unstranded"]
        else:
            orientation = ["sense", "antisense"]
        if gap_counting:
            gap_counts = ["ungapped", "gapped"]
        else:
            gap_counts = ["allreads"]

        self.counts_array = {}
        for o in orientation:
            for g in gap_counts:
                self.counts_array["{}:{}".format(o, g)] = []
                for p in range(self.length):
                    # self.counts_array["{}:{}".format(o,g)].append(decimal.Decimal(0.0))
                    self.counts_array["{}:{}".format(o, g)].append(0)

        # define position_array
        # values  : chromosomal 1-based nucleotide positions in 5' to 3'
        #           orientation WITH RESPECT TO THE FEATURE
        # Example :
        #       + strand:   [10,11,12,13,14,15]
        #       - strand:   [15,14,13,12,11,10]
        # so position_array[0] is always the start of the feature (with upstream padding)
        #    position_array[-1] is always the end of the feature (with downstream padding)
        self.position_array = []
        if self.strand == "-":
            # chromosome start = feature end
            # chromosome end   = feature start
            if count_method == "start":
                start = end
            elif count_method == "end":
                end = start
            region_start = start - self.padding["Downstream"]  # start is really end
            region_end = end + self.padding["Upstream"]  # end is really start
            positions = range(region_start, region_end + 1)  # inclusive list
            positions.reverse()
        else:
            if count_method == "start":
                end = start  # set both start and end to the start value
            elif count_method == "end":
                start = end  # set both start and end to the end value
            region_start = start - self.padding["Upstream"]
            region_end = end + self.padding["Downstream"]
            positions = range(region_start, region_end + 1)  # inclusive list

        self.position_array = positions