Exemple #1
0
def find_nearest_features(peaks,
                          features,
                          distance=None,
                          tss_only=False,
                          only_differentially_expressed=False):
    """
    Locate nearest features for each peak

    Arguments:
      features (FeatureList): list of features
      peaks (PeakList): list of peaks
      distance (int): optional cut-off distance to apply
      tss_only (bool): only consider distances from the
        feature TSS (default is to consider distances from
        both the TSS and TES)
      only_differentially_expressed (bool): only consider
        features that are flagged as differentially expressed
      pad (bool): add extra 'None' items to output
        FeatureSet so that it contains max_closest results

    Yields:
      tuple: Peak object and a FeatureSet object with the
        nearest features, for each peak in the input PeakSet

    """
    # Find nearest features to each peak
    for peak in peaks:
        # Only consider features on same chromosome
        feature_list = features.filterByChr(peak.chrom)
        # Differentially-expressed features only?
        if only_differentially_expressed:
            feature_list = feature_list.filterByFlag(1)
        if tss_only:
            sort_features_by_tss_distances(peak, feature_list)
        else:
            sort_features_by_edge_distances(peak, feature_list)
        # Apply distance cut-off
        if distance is not None:
            closest = FeatureSet()
            for feature in feature_list:
                if tss_only:
                    if distances.distance_tss(peak, feature) > distance:
                        break
                else:
                    if distances.distance_closest_edge(peak,
                                                       feature) > distance:
                        break
                closest.addFeature(feature)
            feature_list = closest
        # Return at least one (null) result
        if not feature_list:
            feature_list.addFeature(None)
        # Return result
        yield (peak, feature_list)
Exemple #2
0
def find_nearest_features(peaks,features,distance=None,tss_only=False,
                          only_differentially_expressed=False):
    """
    Locate nearest features for each peak

    Arguments:
      features (FeatureList): list of features
      peaks (PeakList): list of peaks
      distance (int): optional cut-off distance to apply
      max_closest (int): optional maximum number of peaks
        to find per feature
      tss_only (bool): only consider distances from the
        feature TSS (default is to consider distances from
        both the TSS and TES)
      only_differentially_expressed (bool): only consider
        features that are flagged as differentially expressed
      pad (bool): add extra 'None' items to output
        FeatureSet so that it contains max_closest results

    Yields:
      tuple: Peak object and a FeatureSet object with the
        nearest features, for each peak in the input PeakSet

    """
    # Find nearest features to each peak
    for peak in peaks:
        # Only consider features on same chromosome
        feature_list = features.filterByChr(peak.chrom)
        # Differentially-expressed features only?
        if only_differentially_expressed:
            feature_list = feature_list.filterByFlag(1)
        if tss_only:
            sort_features_by_tss_distances(peak,feature_list)
        else:
            sort_features_by_edge_distances(peak,feature_list)
        # Apply distance cut-off
        if distance is not None:
            closest = FeatureSet()
            for feature in feature_list:
                if tss_only:
                    if distances.distance_tss(peak,feature) > distance:
                        break
                else:
                    if distances.distance_closest_edge(peak,feature) > distance:
                        break
                closest.addFeature(feature)
            feature_list = closest
        # Return at least one (null) result
        if not feature_list:
            feature_list.addFeature(None)
        # Return result
        yield (peak,feature_list)
    def _value_for(self,attr):
        """
        Return the value for the specified attribute

        Given the name of a field/attribute (see above for
        a list and definition of each), return the value
        for the current peak/feature pair (which should have
        been set by the calling method in the '_context_peak'
        and '_context_feature' properties).

        Arguments:
          attr (string): attribute name

        Returns:
          Value of the field for the current peak/feature
          pair

        Raises:
          AttributeError: if valid ``attr`` cannot be derived
          KeyError: if ``attr`` is not a recognised attribute
            name
        
        """
        peak = self._context_peak
        feature = self._context_feature
        is_features = self._is_features
        if attr == 'peak.id':
            return peak.id
        elif attr == 'chr' or attr == 'peak.chr':
            return peak.chrom
        elif attr == 'peak.start' or attr == 'start':
            return peak.start
        elif attr == 'peak.end' or attr == 'end':
            return peak.end
        elif attr == 'id' or attr == 'feature.id':
            return feature.id
        elif attr == 'feature.chr':
            return feature.chrom
        elif attr == 'feature.start':
            return feature.start
        elif attr == 'feature.end':
            return feature.end
        elif attr == 'TSS':
            return feature.tss
        elif attr == 'TES':
            return feature.tes
        elif attr == 'strand' or attr == 'feature.strand':
            return feature.strand
        elif attr == 'differentially_expressed':
            return feature.flag
        elif attr == 'dist_closest':
            return distances.distance_closest_edge(peak,feature)
        elif attr == 'dist_TSS':
            return distances.distance_tss(peak,feature)
        elif attr == 'dist_TES':
            return distances.distance_tes(peak,feature)
        elif attr == 'overlap_feature' or attr == 'in_the_feature':
            if distances.regions_overlap((peak.start,peak.end),
                                         (feature.tss,feature.tes)):
                overlap_feature = 1
            else:
                overlap_feature = 0
            if attr == 'in_the_feature':
                overlap_feature = ('YES' if overlap_feature == 1 else 'NO')
            return overlap_feature
        elif attr == 'overlap_promoter':
            if self._promoter_region is not None:
                promoter = feature.getPromoterRegion(*self._promoter_region)
                if distances.regions_overlap((peak.start,peak.end),
                                             promoter):
                    overlap_promoter = 1
                else:
                    overlap_promoter = 0
            else:
                raise Exception("'overlap_promoter' requested but no "
                                "promoter region has been defined")
            return overlap_promoter
        elif attr == 'direction':
            if self._is_features:
                direction = distances.direction(feature,peak)
            else:
                direction = distances.direction(peak,feature)
            if direction == distances.UPSTREAM:
                return 'U'
            elif direction == distances.DOWNSTREAM:
                return 'D'
            else:
                return '.'
        elif attr == 'features_inbetween':
            raise NotImplementedError("'features_inbetween' not implemented")
        else:
            raise KeyError("Unrecognised report field: '%s'" % attr)
Exemple #4
0
    def _value_for(self,attr):
        """
        Return the value for the specified attribute

        Given the name of a field/attribute (see above for
        a list and definition of each), return the value
        for the current peak/feature pair (which should have
        been set by the calling method in the '_context_peak'
        and '_context_feature' properties).

        Arguments:
          attr (string): attribute name

        Returns:
          Value of the field for the current peak/feature
          pair

        Raises:
          AttributeError: if valid ``attr`` cannot be derived
          KeyError: if ``attr`` is not a recognised attribute
            name
        
        """
        peak = self._context_peak
        feature = self._context_feature
        extra_data = self._extra_data
        is_features = self._is_features
        if attr == 'peak.id':
            return peak.id
        elif attr == 'chr' or attr == 'peak.chr':
            return peak.chrom
        elif attr == 'peak.start' or attr == 'start':
            return peak.start
        elif attr == 'peak.end' or attr == 'end':
            return peak.end
        elif attr == 'peak.file':
            return peak.source_file
        elif attr == 'id' or attr == 'feature.id':
            return feature.id
        elif attr == 'feature.chr':
            return feature.chrom
        elif attr == 'feature.start':
            return feature.start
        elif attr == 'feature.end':
            return feature.end
        elif attr == 'feature.file':
            return feature.source_file
        elif attr == 'TSS':
            return feature.tss
        elif attr == 'TES':
            return feature.tes
        elif attr == 'strand' or attr == 'feature.strand':
            return feature.strand
        elif attr == 'differentially_expressed':
            return feature.flag
        elif attr == 'dist_closest':
            return distances.distance_closest_edge(peak,feature)
        elif attr == 'dist_TSS':
            return distances.distance_tss(peak,feature)
        elif attr == 'dist_TES':
            return distances.distance_tes(peak,feature)
        elif attr == 'overlap_feature' or attr == 'in_the_feature':
            if distances.regions_overlap((peak.start,peak.end),
                                         (feature.tss,feature.tes)):
                overlap_feature = 1
            else:
                overlap_feature = 0
            if attr == 'in_the_feature':
                overlap_feature = ('YES' if overlap_feature == 1 else 'NO')
            return overlap_feature
        elif attr == 'overlap_promoter':
            if self._promoter_region is not None:
                promoter = feature.getPromoterRegion(*self._promoter_region)
                if distances.regions_overlap((peak.start,peak.end),
                                             promoter):
                    overlap_promoter = 1
                else:
                    overlap_promoter = 0
            else:
                raise Exception("'overlap_promoter' requested but no "
                                "promoter region has been defined")
            return overlap_promoter
        elif attr == 'direction':
            if self._is_features:
                direction = distances.direction(feature,peak)
            else:
                direction = distances.direction(peak,feature)
            if direction == distances.UPSTREAM:
                return 'U'
            elif direction == distances.DOWNSTREAM:
                return 'D'
            else:
                return '.'
        elif attr == 'features_inbetween':
            raise NotImplementedError("'features_inbetween' not implemented")
        else:
            # Check extra data items
            try:
                return extra_data[attr]
            except KeyError:
                raise KeyError("Unrecognised report field: '%s'" % attr)