def filter(self, input_filepath, output_filepath):
     self.load_loci(input_filepath)
     self.filtered_loci = {}
     for locus_name, locus in iteritems(self.loci):
         locus = self.filter_locus(locus)
         if locus is not False:
             self.filtered_loci[locus_name] = locus
     self.write_output(output_filepath)
Beispiel #2
0
 def filter(self, input_filepath, output_filepath):
     self.load_loci(input_filepath)
     self.filtered_loci = {}
     for locus_name, locus in iteritems(self.loci):
         locus = self.filter_locus(locus)
         if locus is not False:
             self.filtered_loci[locus_name] = locus
     self.write_output(output_filepath)
        def generate_value_list(self, data):
            values = []

            # Iterate through the data and only record values
            # for k-repeats that have some support
            for k, count in iteritems(data):
                if count > 0:
                    values += [k] * count
            return sorted(values)
 def __normalized_subset(self, subset, data):
     total = self.get_support(subset)
     normalized = {}
     for k, count in iteritems(data):
         if total == 0:
             normalized[k] = 0.0
         else:
             normalized[k] = (1.0 * count) / total
     return normalized
Beispiel #5
0
        def generate_value_list(self,data):
            values = []

            # Iterate through the data and only record values
            # for k-repeats that have some support
            for k, count in iteritems(data):
                if count > 0:
                    values += [k] * count
            return sorted(values)
 def __normalized_subset(self, subset, data):
     total = self.get_support(subset)
     normalized = {}
     for k, count in iteritems(data):
         if total == 0:
             normalized[k] = 0.0
         else:
             normalized[k] = (1.0 * count) / total
     return normalized
 def write_output(self, output_filepath):
     fileout = open(output_filepath, 'w')
     header = ['Locus', 'Repeats', 'Normal', 'Tumor']
     fileout.write('\t'.join(header) + '\n')
     for l, locus in sorted(iteritems(self.filtered_loci)):
         output = locus.generate_output()
         for line in output:
             fileout.write('\t'.join(line) + '\n')
     fileout.close()
     return True
        def subset_outlier_filter(self, data, sds):
            values = self.generate_value_list(data)
            output = {}
            if len(values):
                mean = numpy.mean(values)
                std = numpy.std(values)
                # Round the min and max for the window to allow for
                # some leniency in the filter.
                min_k = int(math.floor(mean - (sds * std)))
                max_k = int(math.ceil(mean + (sds * std)))

                for k, count in iteritems(data):
                    if min_k <= k <= max_k:
                        # Acceptable
                        output[k] = count
            return output
Beispiel #9
0
        def subset_outlier_filter(self, data, sds):
            values = self.generate_value_list(data)
            output = {}
            if len(values):
                mean = numpy.mean(values)
                std = numpy.std(values)
                # Round the min and max for the window to allow for
                # some leniency in the filter.
                min_k = int(math.floor(mean - (sds * std)))
                max_k = int(math.ceil(mean + (sds * std)))

                for k, count in iteritems(data):
                    if min_k <= k <= max_k:
                        # Acceptable
                        output[k] = count
            return output
    def k_values(self, subset=False):
        k_values = set()
        if subset is False:
            for k in self.__k:
                if (k in self.__normal and self.__normal[k] > 0.0) or \
                    (k in self.__tumor and self.__tumor[k] > 0.0):
                    k_values.add(k)
        else:
            if subset.upper()[0] == 'N':
                # Normal data set
                subset = self.__normal
            elif subset.upper()[0] == 'T':
                # Tumor data set
                subset = self.__tumor

            for k, v in iteritems(subset):
                if v > 0.0:
                    k_values.add(k)

        k_values = sorted(k_values)
        return k_values
    def k_values(self, subset = False):
        k_values = set()
        if subset is False:
            for k in self.__k:
                if (k in self.__normal and self.__normal[k] > 0.0) or \
                    (k in self.__tumor and self.__tumor[k] > 0.0):
                    k_values.add(k)
        else:
            if subset.upper()[0] == 'N':
                # Normal data set
                subset = self.__normal
            elif subset.upper()[0] == 'T':
                # Tumor data set
                subset = self.__tumor

            for k, v in iteritems(subset):
                if v > 0.0:
                    k_values.add(k)                    

        k_values = sorted(k_values)
        return k_values
 def expand_kmer_counts(d):
     new_list = []
     for k, v in iteritems(d):
         new_list.extend([k] * v)
     return new_list
    output_filepath = os.path.abspath(args.output)
    status_filepath = output_filepath + '.status'

    loci = load_loci(input_filepath)

    fileout = open(output_filepath, 'w')
    line = '\t'.join([
        'Locus', 'Normal_Reads', 'Tumor_Reads', 'Difference', 'Distance',
        'Dissimilarity'
    ])
    fileout.write(line + '\n')

    # Iterate through all the results to generate the output. As part of the
    # loop, count the weighted values for each metric.
    values = {'difference': [], 'distance': [], 'dissimilarity': []}
    for l, locus in sorted(iteritems(loci)):
        # Calculate post-normalization metrics
        locus.normalize()
        difference = Difference.get(locus)
        distance = EuclideanDistance.get(locus)
        dissimilarity = CosineDissimilarity.get(locus)

        # Generate output line.
        line = '\t'.join([
            str(x) for x in [
                locus.locus(),
                locus.get_support('N'),
                locus.get_support('T'),
                round(difference, 4),
                round(distance, 4),
                round(dissimilarity, 4)
 def expand_kmer_counts(d):
     new_list = []
     for k, v in iteritems(d):
         new_list.extend([k] * v)
     return new_list