Python to_string Beispiele, subspeciesCython.to_string Python Beispiele

Beispiel #1

0

Datei anzeigen

Datei: twolocus.py Projekt: sgreenstein/pairwise-subspecific-origins

def main():
    """ Run some tests with a dummy file, overriding chromosome lengths locally for sake of testing.
    """
    tl = TwoLocus(in_path='/csbiodata/public/www.csbio.unc.edu/htdocs/sgreens/pairwise_origins/')
    # tl = TwoLocus()
    # tl.preprocess(glob.glob('OR_ss_origins/*.hap'))
    print len(tl.list_available_strains())
    exit()
    # print len(tl.list_available_strains())
    # tl.preprocess(['cc_origins.csv'])
    # tl.preprocess(['ccv_origins.csv'])
    classical = [s for s in
                 ["129P1/ReJ",  # "129P3/J", "129S1SvlmJ", "129S6", "129T2/SvEmsJ", "129X1/SvJ", "A/J", "A/WySnJ",
                  "AEJ/GnLeJ", "AEJ/GnRk", "AKR/J", "ALR/LtJ", "ALS/LtJ", "BALB/cByJ", "BALB/cJ", "BDP/J", "BPH/2J",
                  # "BPL/1J", "BPN/3J", "BTBR T<+>tf/J", "BUB/BnJ", "BXSB/MpJ", "C3H/HeJ", "C3HeB/FeJ", "C57BL/10J",
                  # "C57BL/10ScNJ", "C57BL/10SAAAJ", "C57BL/6CR", "C57BL/6J", "C57BL/6NCI", "C57BL/6Tc", "C57BLKS/J",
                  # "C57BR/cdJ", "C57L/J", "C58/J", "CBA/CaJ", "CBA/J", "CE/J", "CHMU/LeJ", "DBA/1J", "DBA/1LacJ",
                  # "DBA/2DeJ", "DBA/2HaSmnJ", "DBA/2J", "DDK/Pas", "DDY/JclSidSeyFrkJ", "DLS/LeJ", "EL/SuzSeyFrkJ",
                  # "FVB/NJ", "HPG/BmJ", "I/LnJ", "IBWSP2", "IBWSR2", "ICOLD2", "IHOT1", "IHOT2", "ILS", "ISS", "JE/LeJ",
                  # "KK/HlJ", "LG/J", "LP/J", "LT/SvEiJ", "MRL/MpJ", "NOD/ShiLtJ", "NON/ShiLtJ", "NONcNZO10/LtJ",
                  # "NONcNZO5/LtJ", "NOR/LtJ", "NU/J", "NZB/BlNJ", "NZL/LtJ", "NZM2410/J", "NZO/HlLtJ", "NZW/LacJ", "P/J",
                  # "PL/J", "PN/nBSwUmabJ", "RF/J", "RHJ/LeJ", "RIIIS/J", "RSV/LeJ", "SB/LeJ", "SEA/GnJ", "SEC/1GnLeJ",
                  # "SEC/1ReJ", "SH1/LeJ", "SI/Col Tyrp1 Dnahc11/J", "SJL/Bm", "SJL/J", "SM/J", "SSL/LeJ", "ST/bJ",
                  "STX/Le", ]  # "SWR/J", "TALLYHO/JngJ", "TKDU/DnJ", "TSJ/LeJ", "YBR/EiJ", "ZRDCT Rax<+>ChUmdJ"]
                 if tl.is_available(s)]
    wild_derived = [s for s in
                    ['22MO',
                     # 'BIK/g', 'BULS', 'BUSNA', 'BZO', 'CALB/RkJ', 'CASA/RkJ', 'CAST/EiJ', 'CIM', 'CKN', 'CKS',
                     'CZECHI/EiJ', 'CZECHII/EiJ', 'DCA', 'DCP', 'DDO', 'DEB', 'DGA', 'DIK', 'DJO', 'DKN', 'DMZ', 'DOT',
                     # 'IS/CamRkJ', 'JF1/Ms', 'LEWES/EiJ', 'MBK', 'MBS', 'MCZ', 'MDG', 'MDGI', 'MDH', 'MGA', 'MH',
                     # 'MOLD/RkJ', 'MOLF/EiJ', 'MOLG/DnJ', 'MOR/RkJ', 'MPB', 'MSM/Ms', 'PERA/EiJ', 'PERC/EiJ', 'POHN/Deh',
                     # 'PWD/PhJ', 'PWK/PhJ', 'RBA/DnJ', 'RBB/DnJ', 'RBF/DnJ', 'SF/CamEiJ', 'SKIVE/EiJ', 'SOD1/EiJ',
                     # 'STLT', 'STRA', 'STRB', 'STUF', 'STUP', 'STUS', 'TIRANO/EiJ', 'WLA', 'WMP', 'WSB/EiJ',
                     'ZALENDE/EiJ'] if tl.is_available(s)]
    tl.contingency_table(classical, wild_derived, '/csbiohome01/sgreens/Projects/intervals/contingency.csv')
    exit()
    x = TwoLocus(chrom_sizes=[20e6, 20e6])
    x.preprocess(["test2.csv"])
    x.unique_combos(['A', 'B', 'D'], ['C', 'E'])
    x.sources_at_point_pair('1', 1, '1', 10000000, ['A'])
    # x.interlocus_dependence([chr(c) for c in xrange(ord('A'), ord('J')+1)])
    # exit()

    x = TwoLocus(chrom_sizes=[20 * 10 ** 6, 20 * 10 ** 6])
    x.preprocess(["test.csv"])
    rez = x.pairwise_frequencies(["A"])

    areas = x.calculate_genomic_area(rez[0], rez[1])
    total = 0.0

    for combo in subspecies.iter_combos():
        print "\t{:15s}({:4d}):{:1.5f}".format(subspecies.to_string(combo), combo,
                                               areas[str(subspecies.to_string(combo))])
        total += areas[str(subspecies.to_string(combo))]
    print "\t{:21s}:{:1.5f}".format("Total", total)

    sys.exit(1)

Beispiel #2

0

Datei anzeigen

Datei: twolocus.py Projekt: sgreenstein/pairwise-subspecific-origins

    def calculate_genomic_area(self, counts, intervals):
        """
        Compute the total genomic 'area' occupied by each combination of subspecies.
        :param counts: dictionary of incidence matrices, one per subspecies combo
        :param intervals: the 'elementary intervals' over which the counts were computed
        """
        # compute area of each cell in the interval grid
        intervals = np.array([0] + intervals, dtype=np.float32) / 1.0e6
        areas = np.zeros([len(intervals) - 1, len(intervals) - 1], dtype=np.float32)
        for row in xrange(1, len(intervals)):
            for col in xrange(row, len(intervals)):
                areas[row - 1, col - 1] = (intervals[row] - intervals[row - 1]) * (intervals[col] - intervals[col - 1])
                if col > row:
                    areas[col - 1, row - 1] = areas[row - 1, col - 1]

        areas_masked = OrderedDict()
        denom = np.sum(np.array(self.sizes) / 1.0e6) ** 2
        for combo, vals in enumerate(counts):
            factor = 1
            areas_masked.update({str(subspecies.to_string(combo, True)): np.sum((vals > 0) * areas * factor) / denom})
        return areas_masked

Beispiel #3

0

Datei anzeigen

Datei: twolocus.py Projekt: sgreenstein/pairwise-subspecific-origins

 def sources_at_point_pair(self, chrom1, pos1, chrom2, pos2, strain_names):
     """ Prints the range of the 2D interval and the counts of subspecific combos at 2 loci in the genome
     :param chrom1: chromosome of one locus
     :param pos1: position of one locus
     :param chrom2: chromosome of another locus
     :param pos2: position of another locus
     :param strain_names: list of strain names to analyze
     """
     coords = [self.genome_index(chrom1, pos1), self.genome_index(chrom2, pos2)]
     mins = [0] * 2
     maxes = [np.sum(self.sizes)] * 2
     coords.sort()
     output = {}
     samples = [[[] for _ in subspecies.iter_subspecies(True)] for _ in subspecies.iter_subspecies(True)]
     key = [subspecies.to_string(s) for s in subspecies.iter_subspecies(True)]
     for strain_name in strain_names:
         intervals = self.sample_dict[strain_name][0]
         sources = self.sample_dict[strain_name][1]
         # find interval containing each location
         i = 0
         interval_indices = [None, None]
         for loc_num in xrange(2):
             while intervals[i] < coords[loc_num]:
                 i += 1
             if i > 0:
                 mins[loc_num] = max(mins[loc_num], intervals[i - 1])
             maxes[loc_num] = min(maxes[loc_num], intervals[i])
             interval_indices[loc_num] = i
         samples[subspecies.to_ordinal(sources[interval_indices[0]])][
             subspecies.to_ordinal(sources[interval_indices[1]])].append(strain_name)
     output['Key'] = key
     output['Samples'] = samples
     output['Intervals'] = [
         self.chrom_and_pos(mins[0], maxes[0]),
         self.chrom_and_pos(mins[1], maxes[1])
     ]
     return output