예제 #1
0
def run_pipeline(args):

    print("Identifying species and samples")
    species_list = merge.select_species(args, dtype='genes')
    for species in species_list:
        print("  %s" % species.id)
        print("    count genomes: %s" % species.info['count_genomes'])
        print("    count samples: %s" % len(species.samples))

    print("\nMerging genes")
    for species in species_list:

        print("  %s" % species.id)
        species.dir = os.path.join(args['outdir'], species.id)
        if not os.path.isdir(species.dir): os.mkdir(species.dir)
        read_cluster_map(species, args['db'], args['cluster_pid'])

        print("    building pangenome matrices")
        build_gene_matrices(species, min_copy=args['min_copy'])
        write_gene_matrices(species)

        print("    writing summary statistics")
        species.write_sample_info(dtype='genes', outdir=args['outdir'])

        write_readme(args, species)

        print("    done!")
예제 #2
0
def run_pipeline(args):

    print("Identifying species and samples")
    if 'db' in args:
        args['iggdb'] = IGGdb(f"{args['db']}/metadata/species_info.tsv")
    global species_list
    species_list = merge.select_species(args, dtype='snps')
    for species in species_list:
        print("  %s" % species.id)
        if 'genome_name' in species.genome_info:
            print("    genome name: %s" % species.genome_info['genome_name'])
        if 'length' in species.genome_info:
            print("    genome length: %s" % species.genome_info['length'])
        if 'contigs' in species.genome_info:
            print("    count contigs: %s" %
                  max(1, int(species.genome_info['contigs'])))
        print("    count samples: %s" % len(species.samples))

    print("\nMerging snps")

    global global_args
    global_args = args
    sem = multiprocessing.Semaphore(int(args['threads']))
    procs = []
    for index in range(0, len(species_list)):
        sem.acquire()
        procs.append(
            multiprocessing.Process(target=psw_safe, args=[index, sem]))
        procs[-1].start()
    for p in procs:
        p.join()
예제 #3
0
파일: merge_snps.py 프로젝트: palc/MIDAS
def run_pipeline(args):

    print("Identifying species")
    species = merge.select_species(args, type='snps')

    print("Merging snps")
    batches = []
    for species in species:
        batches.append({'args': args, 'species': species})
    utility.parallel(merge_snps, batches, args['threads'])
예제 #4
0
def run_pipeline(args):

	print("Identifying species")
	species = merge.select_species(args, type='snps')
	for sp in species:

		print "Merging: %s (id:%s) for %s samples" % (sp.consensus_name, sp.id, len(sp.samples))

		print("  merging per-sample statistics")
		merge.write_summary_stats(sp.id, sp.samples, args, 'snps')

		print("  merging per-site statistics")
		build_snp_matrix(sp.id, sp.samples, args)

		print("  extracting and annotating specified sites")
		filter_snp_matrix(sp.id, sp.samples, args)

		print("  removing temporary files")
		shutil.rmtree('%s/%s/temp' % (args['outdir'], sp.id))
예제 #5
0
def run_pipeline(args):

	print("Identifying species")
	species = merge.select_species(args, type='genes')

	for sp in species:

		print "Merging: %s (id:%s) for %s samples" % (sp.consensus_name, sp.id, len(sp.samples))
		outdir = os.path.join(args['outdir'], sp.id)
		if not os.path.isdir(outdir): os.mkdir(outdir)
			
		print("  building pangenome matrices")
		build_gene_matrices(sp.id, sp.samples, args)
		write_gene_matrices(sp.id, sp.samples, args)
		
		print("  writing gene info file")
		write_gene_info(sp.id, args)

		print("  writing summary statistics")
		merge.write_summary_stats(sp.id, sp.samples, args, 'genes')

		print("")
예제 #6
0
def run_pipeline(args):

    print("Identifying species")
    species = merge.select_species(args, type='genes')

    for sp in species:

        print("Merging: %s for %s samples" % (sp.id, len(sp.samples)))
        sp.dir = os.path.join(args['outdir'], sp.id)
        if not os.path.isdir(sp.dir): os.mkdir(sp.dir)
        read_cluster_map(sp, args['db'], args['cluster_pid'])

        print("  building pangenome matrices")
        build_gene_matrices(sp, min_copy=args['min_copy'])
        write_gene_matrices(sp)

        print("  writing summary statistics")
        merge.write_summary_stats(sp.id, sp.samples, args, 'genes')

        write_readme(args, sp)

        print("")
예제 #7
0
파일: snps.py 프로젝트: zhaoc1/MIDAS
def run_pipeline(args):

    print("Identifying species and samples")
    species_list = merge.select_species(args, dtype='snps')
    for species in species_list:
        print("  %s" % species.id)
        if 'genome_name' in species.genome_info:
            print("    genome name: %s" % species.genome_info['genome_name'])
        if 'length' in species.genome_info:
            print("    genome length: %s" % species.genome_info['length'])
        if 'contigs' in species.genome_info:
            print("    count contigs: %s" %
                  max(1, int(species.genome_info['contigs'])))
        print("    count samples: %s" % len(species.samples))

    print("\nMerging snps")
    for species in species_list:

        print("  %s" % species.id)
        species.tempdir = '%s/%s/temp' % (args['outdir'], species.id)
        if not os.path.isdir(species.tempdir): os.mkdir(species.tempdir)
        species.sample_lists = utility.batch_samples(species.samples,
                                                     threads=args['threads'])
        species.num_splits = len(species.sample_lists)

        print("    merging count data")
        parallel_build_temp_count_matrixes(species, args)

        print("    calling SNPs")
        parallel_build_sharded_tables(species, args)

        print("    writing output files")
        merge_sharded_tables(species, args)

        print("    finishing")
        write_snps_readme(args, species)
        species.write_sample_info(dtype='snps', outdir=args['outdir'])
        shutil.rmtree(species.tempdir)
예제 #8
0
파일: merge_genes.py 프로젝트: palc/MIDAS
def run_pipeline(args):

    print("Identifying species")
    species = merge.select_species(args, type='genes')

    for sp in species:

        print("Merging: %s (id:%s) for %s samples" %
              (sp.consensus_name, sp.id, len(sp.samples)))
        outdir = os.path.join(args['outdir'], sp.id)
        if not os.path.isdir(outdir): os.mkdir(outdir)

        print("  building pangenome matrices")
        build_gene_matrices(sp.id, sp.samples, args)
        write_gene_matrices(sp.id, sp.samples, args)

        print("  writing gene info file")
        write_gene_info(sp.id, args)

        print("  writing summary statistics")
        merge.write_summary_stats(sp.id, sp.samples, args, 'genes')

        print("")