def unique_combos(self, background_strains, foreground_strains): """ finds combinations at interval pairs that is absent from the background but shared by all foreground samples :param background_strains: list of strain names :param foreground_strains: list of strain names :return: json object containing interval pairs """ elem_intervals = self.make_elementary_intervals( [self.sample_dict[sn][0] for sn in background_strains + foreground_strains]) background = self.build_pairwise_matrix(background_strains, elem_intervals) foreground = self.build_pairwise_matrix(foreground_strains, elem_intervals) output = [] uniquities = np.logical_and(foreground == len(foreground_strains), np.logical_not(background)) for combo in xrange(subspecies.NUM_SUBSPECIES**2): combo_uniquities = np.where(uniquities[combo]) combo_color = subspecies.to_color(combo, ordinal=True) for i, j in zip(combo_uniquities[0], combo_uniquities[1]): output.append([ # proximal interval start, end elem_intervals[i - 1], elem_intervals[i], # distal interval start, end elem_intervals[j - 1], elem_intervals[j], combo_color ]) return output
def pairwise_frequencies(self, strain_names): """ For every locus pair and every label pair, count the number of strains which have those labels at those pairs of loci. :param strain_names: list of strain names to analyze (must be a subset of the output from preprocess()) """ output = [[[], [], [], []] for _ in xrange(subspecies.NUM_SUBSPECIES**2)] for strain_name in strain_names: intervals, sources = self.sample_dict[strain_name] for i in xrange(len(intervals)): # only upper triangle is meaningful if subspecies.is_known(sources[i]): for j in xrange(i, len(intervals)): if subspecies.is_known(sources[j]): combo_output = output[subspecies.to_ordinal(subspecies.combine(sources[i], sources[j]))] combo_output[0].append(intervals[i-1]) combo_output[1].append(intervals[i]) combo_output[2].append(intervals[j-1]) combo_output[3].append(intervals[j]) return output, [subspecies.to_color(i, True) for i in xrange(subspecies.NUM_SUBSPECIES**2)]
def not_in_background(self, background_strains, foreground_strains): """ finds combinations at interval pairs that are present in 1+ fg strains but is absent from the background :param background_strains: list of strain names :param foreground_strains: list of strain names :return: json object containing interval pairs """ output = [[[], [], [], [], []] for _ in xrange(subspecies.NUM_SUBSPECIES**2)] for strain in foreground_strains: elem_intervals = self.make_elementary_intervals( [self.sample_dict[sn][0] for sn in background_strains + [strain]]) background_absent = np.logical_not(self.build_pairwise_matrix(background_strains, elem_intervals)) foreground = self.build_pairwise_matrix([strain], elem_intervals) uniquities = np.logical_and(foreground, background_absent) for combo in xrange(subspecies.NUM_SUBSPECIES**2): combo_uniquities = np.where(uniquities[combo]) for i, j in zip(combo_uniquities[0], combo_uniquities[1]): output[combo][0].append(elem_intervals[i-1]) output[combo][1].append(elem_intervals[i]) output[combo][2].append(elem_intervals[j-1]) output[combo][3].append(elem_intervals[j]) output[combo][4].append(strain) return output, [subspecies.to_color(combo, ordinal=True) for combo in xrange(subspecies.NUM_SUBSPECIES**2)]