def interlocus_dependence(self, strain_names):
     """ Performs a chi square test to find interval pairs whose origins are interdependent
     :param strain_names: list of strain names to analyze
     :return: elementary intervals, matrix of chi square values, matrix of p values (both upper triangular)
     """
     combo_count_dict, intervals = self.pairwise_frequencies(strain_names)
     # convert source_counts to matrix combo_counts
     combo_counts = np.empty([len(intervals), len(intervals), subspecies.NUM_SUBSPECIES ** 2], dtype=np.uint16)
     species_counts = np.zeros([len(intervals), subspecies.NUM_SUBSPECIES])
     for i, prox_species in enumerate(subspecies.iter_subspecies()):
         for j, dist_species in enumerate(subspecies.iter_subspecies()):
             counts = combo_count_dict[subspecies.combine(prox_species, dist_species)]
             species_counts[:, i] += np.diag(counts)
             combo_counts[:, :, i * subspecies.NUM_SUBSPECIES + j] = counts
     # compute expected combo frequencies from source frequencies
     combo_expectations = np.zeros([len(intervals), len(intervals), subspecies.NUM_SUBSPECIES ** 2])
     for i in xrange(subspecies.NUM_SUBSPECIES):
         for j in xrange(subspecies.NUM_SUBSPECIES):
             combo_expectations[:, :, i * subspecies.NUM_SUBSPECIES + j] += \
                 np.outer(species_counts[:, i], species_counts[:, j])
     # normalize expectations using the actual total frequency
     sums = np.sum(combo_counts, axis=2)
     old_settings = np.seterr(invalid='ignore')  # ignore division by 0 errors for intervals with no assigned origin
     for i in xrange(subspecies.NUM_SUBSPECIES ** 2):
         combo_expectations[:, :, i] = np.true_divide(combo_expectations[:, :, i], sums)
     np.seterr(**old_settings)
     combo_expectations = np.nan_to_num(combo_expectations)
     # do chi-square test
     output = []
     for i in xrange(len(intervals)):
         # only upper triangle is meaningful
         for j in xrange(i + 1, len(intervals)):
             nonzero_expectations = np.where(combo_expectations[i, j])
             chi_sq, p_value = stats.chisquare(
                 combo_counts[i, j][nonzero_expectations], combo_expectations[i, j][nonzero_expectations])
             output.append([
                     chi_sq,
                     p_value,
                     # proximal interval
                     intervals[i-1],
                     intervals[i],
                     # distal interval
                     intervals[j],
                     intervals[j-1]
                     ])
     return output
 def sources_at_point_pair(self, chrom1, pos1, chrom2, pos2, strain_names):
     """ Prints the range of the 2D interval and the counts of subspecific combos at 2 loci in the genome
     :param chrom1: chromosome of one locus
     :param pos1: position of one locus
     :param chrom2: chromosome of another locus
     :param pos2: position of another locus
     :param strain_names: list of strain names to analyze
     """
     coords = [self.genome_index(chrom1, pos1), self.genome_index(chrom2, pos2)]
     mins = [0] * 2
     maxes = [np.sum(self.sizes)] * 2
     coords.sort()
     output = {}
     samples = [[[] for _ in subspecies.iter_subspecies(True)] for _ in subspecies.iter_subspecies(True)]
     key = [subspecies.to_string(s) for s in subspecies.iter_subspecies(True)]
     for strain_name in strain_names:
         intervals = self.sample_dict[strain_name][0]
         sources = self.sample_dict[strain_name][1]
         # find interval containing each location
         i = 0
         interval_indices = [None, None]
         for loc_num in xrange(2):
             while intervals[i] < coords[loc_num]:
                 i += 1
             if i > 0:
                 mins[loc_num] = max(mins[loc_num], intervals[i - 1])
             maxes[loc_num] = min(maxes[loc_num], intervals[i])
             interval_indices[loc_num] = i
         samples[subspecies.to_ordinal(sources[interval_indices[0]])][
             subspecies.to_ordinal(sources[interval_indices[1]])].append(strain_name)
     output['Key'] = key
     output['Samples'] = samples
     output['Intervals'] = [
         self.chrom_and_pos(mins[0], maxes[0]),
         self.chrom_and_pos(mins[1], maxes[1])
     ]
     return output