def sources_at_point_pair(self, chrom1, pos1, chrom2, pos2, strain_names):
     """ Prints the range of the 2D interval and the counts of subspecific combos at 2 loci in the genome
     :param chrom1: chromosome of one locus
     :param pos1: position of one locus
     :param chrom2: chromosome of another locus
     :param pos2: position of another locus
     :param strain_names: list of strain names to analyze
     """
     coords = [self.genome_index(chrom1, pos1), self.genome_index(chrom2, pos2)]
     mins = [0] * 2
     maxes = [np.sum(self.sizes)] * 2
     coords.sort()
     output = {}
     samples = [[[] for _ in subspecies.iter_subspecies(True)] for _ in subspecies.iter_subspecies(True)]
     key = [subspecies.to_string(s) for s in subspecies.iter_subspecies(True)]
     for strain_name in strain_names:
         intervals = self.sample_dict[strain_name][0]
         sources = self.sample_dict[strain_name][1]
         # find interval containing each location
         i = 0
         interval_indices = [None, None]
         for loc_num in xrange(2):
             while intervals[i] < coords[loc_num]:
                 i += 1
             if i > 0:
                 mins[loc_num] = max(mins[loc_num], intervals[i - 1])
             maxes[loc_num] = min(maxes[loc_num], intervals[i])
             interval_indices[loc_num] = i
         samples[subspecies.to_ordinal(sources[interval_indices[0]])][
             subspecies.to_ordinal(sources[interval_indices[1]])].append(strain_name)
     output['Key'] = key
     output['Samples'] = samples
     output['Intervals'] = [
         self.chrom_and_pos(mins[0], maxes[0]),
         self.chrom_and_pos(mins[1], maxes[1])
     ]
     return output
 def build_pairwise_matrix(self, strain_names, elem_intervals):
     # 3d matrix. First index is combo, remaining 2d matrices are counts for pairwise intervals
     source_counts = np.zeros([(subspecies.NUM_SUBSPECIES + 1) ** 2, len(elem_intervals), len(elem_intervals)],
                              dtype=np.int16)
     for strain_name in strain_names:
         intervals, sources = self.sample_dict[strain_name]
         # map this strain's intervals onto the elementary intervals
         breaks = np.insert(np.searchsorted(elem_intervals, intervals), 0, -1)
         for row in xrange(len(intervals)):
             for col in xrange(row, len(intervals)):  # only upper triangle
                 source = subspecies.combine(sources[row], sources[col])
                 source_ordinate = subspecies.to_ordinal(source)
                 source_counts[source_ordinate, breaks[row] + 1:breaks[row + 1] + 1,
                 breaks[col] + 1:breaks[col + 1] + 1] += 1
     return source_counts
 def pairwise_frequencies(self, strain_names):
     """ For every locus pair and every label pair, count the number of strains which have those
     labels at those pairs of loci.
     :param strain_names: list of strain names to analyze (must be a subset of the output from preprocess())
     """
     output = [[[], [], [], []] for _ in xrange(subspecies.NUM_SUBSPECIES**2)]
     for strain_name in strain_names:
         intervals, sources = self.sample_dict[strain_name]
         for i in xrange(len(intervals)):
             # only upper triangle is meaningful
             if subspecies.is_known(sources[i]):
                 for j in xrange(i, len(intervals)):
                     if subspecies.is_known(sources[j]):
                         combo_output = output[subspecies.to_ordinal(subspecies.combine(sources[i], sources[j]))]
                         combo_output[0].append(intervals[i-1])
                         combo_output[1].append(intervals[i])
                         combo_output[2].append(intervals[j-1])
                         combo_output[3].append(intervals[j])
     return output, [subspecies.to_color(i, True) for i in xrange(subspecies.NUM_SUBSPECIES**2)]