def shared_to_directory(population, labeled_nodes, genome_generator, recombinators, directory, min_segment_length = 0, clobber = True, iterations = 1000, generations_back_shared = 7): labeled_nodes = set(labeled_nodes) unlabeled_nodes = chain.from_iterable(generation.members for generation in population.generations[-3:]) unlabeled_nodes = set(unlabeled_nodes) - labeled_nodes print("Finding related pairs.") pairs = related_pairs(unlabeled_nodes, labeled_nodes, population, generations_back_shared) print("{} related pairs.".format(len(pairs))) print("Opening file descriptors.") if clobber: mode = "w" else: mode = "a" fds = {node: open(join(directory, str(node._id)), mode) for node in labeled_nodes} print("Calculating shared lengths.") for i in range(iterations): print("iteration {}".format(i)) print("Cleaning genomes.") population.clean_genomes() print("Generating genomes") generate_genomes(population, genome_generator, recombinators, 3) print("Calculating shared length") _calculate_shared_to_fds(pairs, fds, min_segment_length) for fd in fds.values(): fd.close()
def simulate_founder_stats(population, genome_generator, recombinators, iterations, output_file): node_stats = defaultdict(lambda: defaultdict(list)) for i in range(iterations): genome_generator.reset() print("iteration {}".format(i)) print("Cleaning genomes.") population.clean_genomes() print("Generating genomes") generate_genomes(population, genome_generator, recombinators, 3, true_genealogy=False) print("Calculating founder stats") nodes = (node for node in population.members if node.genome is not None) for node in nodes: mother = node.genome.mother father = node.genome.father temp_founder_lengths = defaultdict(list) diploid_founders(mother, temp_founder_lengths) diploid_founders(father, temp_founder_lengths) founder_map = node_stats[str(node._id)] for founder, lengths in temp_founder_lengths.items(): founder_map[str(founder)].append((sum(lengths), len(lengths))) with open(output_file, "w") as output_json: dump(node_stats, output_json)
def shared_to_directory(population, labeled_nodes, genome_generator, recombinators, directory, min_segment_length=0, clobber=True, iterations=1000, generations_back_shared=7, non_paternity=0.0): labeled_nodes = set(labeled_nodes) unlabeled_nodes = chain.from_iterable( generation.members for generation in population.generations[-3:]) unlabeled_nodes = set(unlabeled_nodes) # - labeled_nodes print("Finding related pairs.") pairs = related_pairs(unlabeled_nodes, labeled_nodes, population, generations_back_shared) print("{} related pairs.".format(len(pairs))) print("Opening file descriptors.") if clobber: mode = "w" else: mode = "a" fds = { node: open(join(directory, str(node._id)), mode) for node in labeled_nodes } suppressor = ParentSuppressor(non_paternity, 0.0) print("Calculating shared lengths.") for i in range(iterations): print("iteration {}".format(i)) print("Cleaning genomes.") population.clean_genomes() # print("Perturbing parentage") # suppressor.suppress(population) print("Generating genomes") generate_genomes(population, genome_generator, recombinators, 3, true_genealogy=False) print("Calculating shared length") _calculate_shared_to_fds(pairs, fds, min_segment_length) # print("Fixing perturbation") # suppressor.unsuppress() for fd in fds.values(): fd.close()
def shared_to_directory(population, labeled_nodes, genome_generator, recombinators, directory, min_segment_length=0, clobber=True, iterations=1000, generations_back_shared=7): labeled_nodes = set(labeled_nodes) unlabeled_nodes = chain.from_iterable( generation.members for generation in population.generations[-3:]) unlabeled_nodes = set(unlabeled_nodes) - labeled_nodes print("Finding related pairs.") pairs = related_pairs(unlabeled_nodes, labeled_nodes, population, generations_back_shared) print("{} related pairs.".format(len(pairs))) print("Opening file descriptors.") if clobber: mode = "w" else: mode = "a" fds = { node: open(join(directory, str(node._id)), mode) for node in labeled_nodes } print("Calculating shared lengths.") for i in range(iterations): print("iteration {}".format(i)) print("Cleaning genomes.") population.clean_genomes() print("Generating genomes") generate_genomes(population, genome_generator, recombinators, 3) print("Calculating shared length") _calculate_shared_to_fds(pairs, fds, min_segment_length) for fd in fds.values(): fd.close()
tree = tree_from_file(args.tree_file) leaves = tree.leaves for person in founders: tree.add_individual(choice(leaves), person) population = HierarchicalIslandPopulation(tree) for _ in range(args.num_generations - 1): population.new_generation() if not args.no_genomes: # tr = tracker.SummaryTracker() recombinators = recombinators_from_directory(args.recombination_dir) chrom_sizes = recombinators[Sex.Male]._num_bases genome_generator = RecombGenomeGenerator(chrom_sizes) generate_genomes(population, genome_generator, recombinators, 3) # tr.print_diff() # summary.print_(summary.summarize(muppy.get_objects())) # genomes = [m.genome for m in population.members] # print("genome sizes: " + str(asizeof.asizeof(genomes) // 1024)) # print("population size: " + str(asizeof.asizeof(population) // 1024)) if args.output_file: with open(args.output_file, "wb") as pickle_file: # Trees cause deep recursion in the pickle module, so we need # to raise the recursion limit. This is the stack depth for # python functions, you may need to increase the native stack # depth using ulimit -s # https://docs.python.org/3.4/library/pickle.html#what-can-be-pickled-and-unpickled dump(population, pickle_file, protocol = HIGHEST_PROTOCOL)