Ejemplo n.º 1
0
def test_counts(contrib_data):
    """
    Checks that allele counts are within 95 percent confidence intervals.
    Data are stored in arrays with format [ind, num_alleles] = count
    """
    ## Find the probands - they should always contribute their own alleles
    P = ped.Pedigree(args.pedfile)

    ## TODO: Test individuals with multiple offspring +p2 id:112
    ## Case when no regions are specified - only all probands
    regions = contrib_data.keys()
    if len(regions) == 1:
        all_contribs = contrib_data[regions[0]]
        for prob in P.probands:
            ## Probands should always contribute 1 allele
            assert all_contribs[prob, 1] == args.iterations

        ## Test specific individuals
        counts = Counter(all_contribs[11])
        binomial_sd = np.sqrt(args.iterations * 0.25)
        mean = np.mean(counts.values())
        conf95 = (mean - 2 * binomial_sd, mean + 2 * binomial_sd)

        for count in counts.values():
            assert conf95[0] < count < conf95[1]
Ejemplo n.º 2
0
def sim_data(request):
    """ Generate simulation data to be used in other tests """
    ## Set parameters which vary between runs
    args.pedfile, known_genotypes, args.sim_homs, args.kinship = request.param

    ## Set up simulations
    P = ped.Pedigree(args.pedfile)
    ind_cones = P.allowedinds(known_genotypes.keys())[2]
    parent_dict = P.parent_dict.copy()

    signals = dict()
    signals[0] = 0
    for ind in parent_dict:
        signals[ind] = 0
                    
    ancs_dict = P.ancestors_dict(known_genotypes.keys())

    A = climb.AncFinder(ind_cones, parent_dict, ancs_dict, args)

    ## Run Simulations
    ancs, liks, trees = [], [], []
    for i in range(args.iterations):
        genotypes = known_genotypes.copy()
        sig = signals.copy()

        ## Simulate coalescence of all affected alleles
        anc, lik, tree = A.coalesce(genotypes, sig)
        ancs.append(anc)
        liks.append(lik)
        trees.append(tree)

    yield {'ancs': ancs, 'liks': liks, 'args': args, 'trees': trees}
Ejemplo n.º 3
0
def simulate(args, initial_genotypes, outfile):
    """
    Performs the specified number of climbing simulations, returning
    ancestors, importance sampling likelihoods, and inheritance histories.
    """
    ## Load the pedigree and parent-offspring relationships
    vprint("Loading pedigree...")
    P = ped.Pedigree(os.path.expanduser(args.pedfile))

    parent_dict = P.parent_dict
    ##TODO Pass the whole dictionary to improve importance sampling +t1
    ind_cones = P.allowedinds(initial_genotypes.keys())[2]

    signals = dict()
    signals[0] = 0

    print('create signal dict')
    for ind in parent_dict:
        signals[ind] = 0

    print('create ancestors dict')
    ancs_dict = P.ancestors_dict(initial_genotypes.keys())

    print('initialisation')
    A = climb.AncFinder(ind_cones, parent_dict, ancs_dict, args)

    ## Perform climbing simulations
    vprint("Performing simulations...")
    blocksize = 100
    simdat_dict = {'Anc': [], 'Log2Lik': [], 'Tree': []}
    for i in range(args.iterations):
        if i % 100 == 0:
            vprint(i, "/", args.iterations)

        ## Make a copy of the initial genotypes so they don't get modified
        genotypes = initial_genotypes.copy()
        sig = signals.copy()

        ## Simulate coalescence of all affected alleles
        anc, lik, tree = A.coalesce(genotypes, sig)
        simdat_dict['Anc'].append(anc)
        simdat_dict['Log2Lik'].append(lik)
        simdat_dict['Tree'].append(tree)

        ## Write results to output in blocks
        if i % blocksize == 0:
            incremental_write(simdat_dict['Anc'], simdat_dict['Log2Lik'],
                              simdat_dict['Tree'], blocksize, outfile)

    ## Write leftover results
    num_remaining = i % blocksize
    incremental_write(simdat_dict['Anc'], simdat_dict['Log2Lik'],
                      simdat_dict['Tree'], num_remaining, outfile)

    simdat_i = pd.DataFrame(simdat_dict)
    simdat_i.index.name = 'Sim'

    return simdat_i
Ejemplo n.º 4
0
def main(args):
    pedfile = os.path.expanduser(args.pedfile)
    climb_lik_file = os.path.expanduser(args.climb_lik_file)
    control_lik_file = os.path.expanduser(args.control_lik_file)
    hap_length = args.haplotype_length

    print "Loading pedigree..."
    P = ped.Pedigree(pedfile)

    with tables.open_file(climb_lik_file, 'r') as f:
        shape = f.root.liks.shape
        haplotype_liks = np.zeros(shape)

        for i, tree in enumerate(f.root.trees):

            num_hidden_transmissions = get_num_hidden_transissions(P, tree)
            scale = 1. / (len(tree) - num_hidden_transmissions)
            haplotype_liks[i] = scipy.stats.erlang(2,
                                                   scale=scale).pdf(hap_length)

            if i % 10000 == 0:
                print "Calculating haplotype likelihoods for tree number", i, \
                      "of", len(haplotype_liks)

    norm_hap_liks = np.log2(haplotype_liks / np.sum(haplotype_liks))

    print "Writing combined likelihoods to file..."
    with tables.open_file(control_lik_file, 'a') as hapfile:
        tot_hap_liks = hapfile.create_carray(
            hapfile.root,
            'tot_hap_liks',
            tables.FloatAtom(),
            shape=shape,
            title='Total tree likelihoods, including haplotype length')
        haps = hapfile.create_carray(hapfile.root, 'haplotype_liks',
                tables.FloatAtom(), shape=shape,
                title='Likelihood of observed haplotype length:' + \
                        str(hap_length) + 'Morgans')

        haps[:] = norm_hap_liks
        tot_liks = hapfile.root.tot_liks[:]
        t = norm_hap_liks + tot_liks
        tot_hap_liks[:] = norm_hap_liks + tot_liks
Ejemplo n.º 5
0
import sys, os
import numpy as np
import time

import ped
import pysignal


# pedfile = os.path.expanduser('~/project/anc_finder/data/BALasc_probands1930.txt')
# pedfile = os.path.expanduser('~/project/anc_finder/data/pedEx.txt')
pedfile = os.path.expanduser(
        '~/project/anc_finder/scripts/test/test_data/pedEx2.txt')
# pedfile = os.path.expanduser(
#         '~/project/anc_finder/scripts/test/test_data/pedEx3.txt')
P = ped.Pedigree(pedfile)

ped_arr = pysignal.sort_ped(P)
ninds = len(P.inds)
print len(P.probands), "probands"
sample_idx = [P.ind_dict[x] for x in list(P.probands)[:10]]
samples = np.array(sample_idx, dtype=np.int32)
print "Samples:", samples

cP = pysignal.cPed()
cP.load_ped(ped_arr, len(samples))
cP.load_samples(samples)
# cP.print_samples()
# for s in samples:
#     cP.update_ancestor_weights(s, 1)
# cP.print_nodes()
cP.init_sample_weights()
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("-o", "--outfile",
                        help="Output file. Both symmetrical inds and " +\
                        "monogamous couples will be saved in the same file")
    parser.add_argument("-s", "--split",
                        help="Display monogamous couples separately from " +\
                        "symmetrical individuals. Any file written will " +\
                        "still combine the two")

    requiredNamed = parser.add_argument_group('required named arguments')
    requiredNamed.add_argument("-f",
                               "--pedfile",
                               help="File containing the pedigree to analyze",
                               required=True)

    requireOne = parser.add_argument_group('require one of')
    group = requireOne.add_mutually_exclusive_group(required=True)
    group.add_argument("-p",
                       "--probandfile",
                       help="File containing column of probands")
    group.add_argument("-l",
                       "--probandlist",
                       help="List of probands separated by commas (no spaces)")
    group.add_argument("-P", "--probandpaths-file",
                        help="File containing paths of multiple proband files." +\
                                " Writes symmetries of each panel to the same " +\
                                "directory, in a file named 'symmetry.txt'")

    args = parser.parse_args()

    if args.probandpaths_file and args.outfile:
        print "Incompatible command-like options: Cannot specify outfile for", +\
                "batch symmetry output."
        sys.exit()

    pedfile = os.path.expanduser(args.pedfile)

    if args.probandfile is not None:
        probands = map(int,
                       [line.strip() for line in open(args.probandfile, 'rU')])
    if args.probandlist is not None:
        probands = map(int, args.probandlist.split(','))
    if args.probandpaths_file is not None:
        allprobands = []
        outpaths = []
        probandpaths = [
            line.strip() for line in open(args.probandpaths_file, 'rU')
        ]
        for path in probandpaths:
            allprobands.append(
                map(int, [line.strip() for line in open(path, 'rU')]))
            outpaths.append(os.path.split(path)[0] + '/symmetry.txt')

    print "Loading pedigree from file:", pedfile
    P = ped.Pedigree(pedfile)
    print "Done."
    print "Finding symmetries..."
    PS = PedSymmetry(P)

    if args.outfile is not None:
        print "Probands:", probands
        print "Outputting symmetries to", args.outfile
        PS.write_symmetrical_inds(probands, args.outfile)
    elif args.probandpaths_file is not None:
        for probands, path in zip(allprobands, outpaths):
            print "Finding symmetries for probands:", probands
            PS.write_symmetrical_inds(probands, path)
            print "Symmetries written to:", path
    else:
        print "Probands:", probands
        print PS.get_symm_inds(probands)