コード例 #1
0
 def unique_combos(self, background_strains, foreground_strains):
     """ finds combinations at interval pairs that is absent from the background but shared by all foreground samples
     :param background_strains: list of strain names
     :param foreground_strains: list of strain names
     :return: json object containing interval pairs
     """
     elem_intervals = self.make_elementary_intervals(
         [self.sample_dict[sn][0] for sn in background_strains + foreground_strains])
     background = self.build_pairwise_matrix(background_strains, elem_intervals)
     foreground = self.build_pairwise_matrix(foreground_strains, elem_intervals)
     output = []
     uniquities = np.logical_and(foreground == len(foreground_strains), np.logical_not(background))
     for combo in xrange(subspecies.NUM_SUBSPECIES**2):
         combo_uniquities = np.where(uniquities[combo])
         combo_color = subspecies.to_color(combo, ordinal=True)
         for i, j in zip(combo_uniquities[0], combo_uniquities[1]):
             output.append([
                 # proximal interval start, end
                 elem_intervals[i - 1],
                 elem_intervals[i],
                 # distal interval start, end
                 elem_intervals[j - 1],
                 elem_intervals[j],
                 combo_color
             ])
     return output
コード例 #2
0
 def pairwise_frequencies(self, strain_names):
     """ For every locus pair and every label pair, count the number of strains which have those
     labels at those pairs of loci.
     :param strain_names: list of strain names to analyze (must be a subset of the output from preprocess())
     """
     output = [[[], [], [], []] for _ in xrange(subspecies.NUM_SUBSPECIES**2)]
     for strain_name in strain_names:
         intervals, sources = self.sample_dict[strain_name]
         for i in xrange(len(intervals)):
             # only upper triangle is meaningful
             if subspecies.is_known(sources[i]):
                 for j in xrange(i, len(intervals)):
                     if subspecies.is_known(sources[j]):
                         combo_output = output[subspecies.to_ordinal(subspecies.combine(sources[i], sources[j]))]
                         combo_output[0].append(intervals[i-1])
                         combo_output[1].append(intervals[i])
                         combo_output[2].append(intervals[j-1])
                         combo_output[3].append(intervals[j])
     return output, [subspecies.to_color(i, True) for i in xrange(subspecies.NUM_SUBSPECIES**2)]
コード例 #3
0
 def not_in_background(self, background_strains, foreground_strains):
     """ finds combinations at interval pairs that are present in 1+ fg strains but is absent from the background
     :param background_strains: list of strain names
     :param foreground_strains: list of strain names
     :return: json object containing interval pairs
     """
     output = [[[], [], [], [], []] for _ in xrange(subspecies.NUM_SUBSPECIES**2)]
     for strain in foreground_strains:
         elem_intervals = self.make_elementary_intervals(
             [self.sample_dict[sn][0] for sn in background_strains + [strain]])
         background_absent = np.logical_not(self.build_pairwise_matrix(background_strains, elem_intervals))
         foreground = self.build_pairwise_matrix([strain], elem_intervals)
         uniquities = np.logical_and(foreground, background_absent)
         for combo in xrange(subspecies.NUM_SUBSPECIES**2):
             combo_uniquities = np.where(uniquities[combo])
             for i, j in zip(combo_uniquities[0], combo_uniquities[1]):
                 output[combo][0].append(elem_intervals[i-1])
                 output[combo][1].append(elem_intervals[i])
                 output[combo][2].append(elem_intervals[j-1])
                 output[combo][3].append(elem_intervals[j])
                 output[combo][4].append(strain)
     return output, [subspecies.to_color(combo, ordinal=True) for combo in xrange(subspecies.NUM_SUBSPECIES**2)]