def shared_to_directory(population, labeled_nodes, genome_generator,
                        recombinators, directory, min_segment_length = 0,
                        clobber = True, iterations = 1000,
                        generations_back_shared = 7):

    labeled_nodes = set(labeled_nodes)
    unlabeled_nodes = chain.from_iterable(generation.members
                                          for generation
                                          in population.generations[-3:])
    unlabeled_nodes = set(unlabeled_nodes) - labeled_nodes
    print("Finding related pairs.")
    pairs = related_pairs(unlabeled_nodes, labeled_nodes, population,
                          generations_back_shared)
    print("{} related pairs.".format(len(pairs)))
    print("Opening file descriptors.")
    if clobber:
        mode = "w"
    else:
        mode = "a"
    fds = {node: open(join(directory, str(node._id)), mode)
           for node in labeled_nodes}
    print("Calculating shared lengths.")
    for i in range(iterations):
        print("iteration {}".format(i))
        print("Cleaning genomes.")
        population.clean_genomes()
        print("Generating genomes")
        generate_genomes(population, genome_generator, recombinators, 3)
        print("Calculating shared length")
        _calculate_shared_to_fds(pairs, fds, min_segment_length)
    for fd in fds.values():
        fd.close()
Ejemplo n.º 2
0
def simulate_founder_stats(population, genome_generator, recombinators,
                           iterations, output_file):
    node_stats = defaultdict(lambda: defaultdict(list))
    for i in range(iterations):
        genome_generator.reset()
        print("iteration {}".format(i))
        print("Cleaning genomes.")
        population.clean_genomes()
        print("Generating genomes")
        generate_genomes(population,
                         genome_generator,
                         recombinators,
                         3,
                         true_genealogy=False)
        print("Calculating founder stats")
        nodes = (node for node in population.members
                 if node.genome is not None)
        for node in nodes:
            mother = node.genome.mother
            father = node.genome.father
            temp_founder_lengths = defaultdict(list)
            diploid_founders(mother, temp_founder_lengths)
            diploid_founders(father, temp_founder_lengths)
            founder_map = node_stats[str(node._id)]
            for founder, lengths in temp_founder_lengths.items():
                founder_map[str(founder)].append((sum(lengths), len(lengths)))

    with open(output_file, "w") as output_json:
        dump(node_stats, output_json)
def shared_to_directory(population,
                        labeled_nodes,
                        genome_generator,
                        recombinators,
                        directory,
                        min_segment_length=0,
                        clobber=True,
                        iterations=1000,
                        generations_back_shared=7,
                        non_paternity=0.0):

    labeled_nodes = set(labeled_nodes)
    unlabeled_nodes = chain.from_iterable(
        generation.members for generation in population.generations[-3:])
    unlabeled_nodes = set(unlabeled_nodes)  # - labeled_nodes
    print("Finding related pairs.")
    pairs = related_pairs(unlabeled_nodes, labeled_nodes, population,
                          generations_back_shared)
    print("{} related pairs.".format(len(pairs)))
    print("Opening file descriptors.")
    if clobber:
        mode = "w"
    else:
        mode = "a"
    fds = {
        node: open(join(directory, str(node._id)), mode)
        for node in labeled_nodes
    }
    suppressor = ParentSuppressor(non_paternity, 0.0)
    print("Calculating shared lengths.")
    for i in range(iterations):
        print("iteration {}".format(i))
        print("Cleaning genomes.")
        population.clean_genomes()
        # print("Perturbing parentage")
        # suppressor.suppress(population)
        print("Generating genomes")
        generate_genomes(population,
                         genome_generator,
                         recombinators,
                         3,
                         true_genealogy=False)
        print("Calculating shared length")
        _calculate_shared_to_fds(pairs, fds, min_segment_length)
        # print("Fixing perturbation")
        # suppressor.unsuppress()
    for fd in fds.values():
        fd.close()
def shared_to_directory(population,
                        labeled_nodes,
                        genome_generator,
                        recombinators,
                        directory,
                        min_segment_length=0,
                        clobber=True,
                        iterations=1000,
                        generations_back_shared=7):

    labeled_nodes = set(labeled_nodes)
    unlabeled_nodes = chain.from_iterable(
        generation.members for generation in population.generations[-3:])
    unlabeled_nodes = set(unlabeled_nodes) - labeled_nodes
    print("Finding related pairs.")
    pairs = related_pairs(unlabeled_nodes, labeled_nodes, population,
                          generations_back_shared)
    print("{} related pairs.".format(len(pairs)))
    print("Opening file descriptors.")
    if clobber:
        mode = "w"
    else:
        mode = "a"
    fds = {
        node: open(join(directory, str(node._id)), mode)
        for node in labeled_nodes
    }
    print("Calculating shared lengths.")
    for i in range(iterations):
        print("iteration {}".format(i))
        print("Cleaning genomes.")
        population.clean_genomes()
        print("Generating genomes")
        generate_genomes(population, genome_generator, recombinators, 3)
        print("Calculating shared length")
        _calculate_shared_to_fds(pairs, fds, min_segment_length)
    for fd in fds.values():
        fd.close()
tree = tree_from_file(args.tree_file)
leaves = tree.leaves
for person in founders:
    tree.add_individual(choice(leaves), person)
population = HierarchicalIslandPopulation(tree)

for _ in range(args.num_generations - 1):
    population.new_generation()

if not args.no_genomes:
    # tr = tracker.SummaryTracker()
    recombinators = recombinators_from_directory(args.recombination_dir)
    chrom_sizes = recombinators[Sex.Male]._num_bases
    genome_generator = RecombGenomeGenerator(chrom_sizes)
    generate_genomes(population, genome_generator, recombinators, 3)
    # tr.print_diff()
    # summary.print_(summary.summarize(muppy.get_objects()))

# genomes = [m.genome for m in population.members]
# print("genome sizes: " + str(asizeof.asizeof(genomes) // 1024))
# print("population size: " + str(asizeof.asizeof(population) // 1024))

if args.output_file:
    with open(args.output_file, "wb") as pickle_file:
        # Trees cause deep recursion in the pickle module, so we need
        # to raise the recursion limit. This is the stack depth for
        # python functions, you may need to increase the native stack
        # depth using ulimit -s
        # https://docs.python.org/3.4/library/pickle.html#what-can-be-pickled-and-unpickled
        dump(population, pickle_file, protocol = HIGHEST_PROTOCOL)