def run_pipeline(args): print("Identifying species and samples") species_list = merge.select_species(args, dtype='genes') for species in species_list: print(" %s" % species.id) print(" count genomes: %s" % species.info['count_genomes']) print(" count samples: %s" % len(species.samples)) print("\nMerging genes") for species in species_list: print(" %s" % species.id) species.dir = os.path.join(args['outdir'], species.id) if not os.path.isdir(species.dir): os.mkdir(species.dir) read_cluster_map(species, args['db'], args['cluster_pid']) print(" building pangenome matrices") build_gene_matrices(species, min_copy=args['min_copy']) write_gene_matrices(species) print(" writing summary statistics") species.write_sample_info(dtype='genes', outdir=args['outdir']) write_readme(args, species) print(" done!")
def run_pipeline(args): print("Identifying species and samples") if 'db' in args: args['iggdb'] = IGGdb(f"{args['db']}/metadata/species_info.tsv") global species_list species_list = merge.select_species(args, dtype='snps') for species in species_list: print(" %s" % species.id) if 'genome_name' in species.genome_info: print(" genome name: %s" % species.genome_info['genome_name']) if 'length' in species.genome_info: print(" genome length: %s" % species.genome_info['length']) if 'contigs' in species.genome_info: print(" count contigs: %s" % max(1, int(species.genome_info['contigs']))) print(" count samples: %s" % len(species.samples)) print("\nMerging snps") global global_args global_args = args sem = multiprocessing.Semaphore(int(args['threads'])) procs = [] for index in range(0, len(species_list)): sem.acquire() procs.append( multiprocessing.Process(target=psw_safe, args=[index, sem])) procs[-1].start() for p in procs: p.join()
def run_pipeline(args): print("Identifying species") species = merge.select_species(args, type='snps') print("Merging snps") batches = [] for species in species: batches.append({'args': args, 'species': species}) utility.parallel(merge_snps, batches, args['threads'])
def run_pipeline(args): print("Identifying species") species = merge.select_species(args, type='snps') for sp in species: print "Merging: %s (id:%s) for %s samples" % (sp.consensus_name, sp.id, len(sp.samples)) print(" merging per-sample statistics") merge.write_summary_stats(sp.id, sp.samples, args, 'snps') print(" merging per-site statistics") build_snp_matrix(sp.id, sp.samples, args) print(" extracting and annotating specified sites") filter_snp_matrix(sp.id, sp.samples, args) print(" removing temporary files") shutil.rmtree('%s/%s/temp' % (args['outdir'], sp.id))
def run_pipeline(args): print("Identifying species") species = merge.select_species(args, type='genes') for sp in species: print "Merging: %s (id:%s) for %s samples" % (sp.consensus_name, sp.id, len(sp.samples)) outdir = os.path.join(args['outdir'], sp.id) if not os.path.isdir(outdir): os.mkdir(outdir) print(" building pangenome matrices") build_gene_matrices(sp.id, sp.samples, args) write_gene_matrices(sp.id, sp.samples, args) print(" writing gene info file") write_gene_info(sp.id, args) print(" writing summary statistics") merge.write_summary_stats(sp.id, sp.samples, args, 'genes') print("")
def run_pipeline(args): print("Identifying species") species = merge.select_species(args, type='genes') for sp in species: print("Merging: %s for %s samples" % (sp.id, len(sp.samples))) sp.dir = os.path.join(args['outdir'], sp.id) if not os.path.isdir(sp.dir): os.mkdir(sp.dir) read_cluster_map(sp, args['db'], args['cluster_pid']) print(" building pangenome matrices") build_gene_matrices(sp, min_copy=args['min_copy']) write_gene_matrices(sp) print(" writing summary statistics") merge.write_summary_stats(sp.id, sp.samples, args, 'genes') write_readme(args, sp) print("")
def run_pipeline(args): print("Identifying species and samples") species_list = merge.select_species(args, dtype='snps') for species in species_list: print(" %s" % species.id) if 'genome_name' in species.genome_info: print(" genome name: %s" % species.genome_info['genome_name']) if 'length' in species.genome_info: print(" genome length: %s" % species.genome_info['length']) if 'contigs' in species.genome_info: print(" count contigs: %s" % max(1, int(species.genome_info['contigs']))) print(" count samples: %s" % len(species.samples)) print("\nMerging snps") for species in species_list: print(" %s" % species.id) species.tempdir = '%s/%s/temp' % (args['outdir'], species.id) if not os.path.isdir(species.tempdir): os.mkdir(species.tempdir) species.sample_lists = utility.batch_samples(species.samples, threads=args['threads']) species.num_splits = len(species.sample_lists) print(" merging count data") parallel_build_temp_count_matrixes(species, args) print(" calling SNPs") parallel_build_sharded_tables(species, args) print(" writing output files") merge_sharded_tables(species, args) print(" finishing") write_snps_readme(args, species) species.write_sample_info(dtype='snps', outdir=args['outdir']) shutil.rmtree(species.tempdir)
def run_pipeline(args): print("Identifying species") species = merge.select_species(args, type='genes') for sp in species: print("Merging: %s (id:%s) for %s samples" % (sp.consensus_name, sp.id, len(sp.samples))) outdir = os.path.join(args['outdir'], sp.id) if not os.path.isdir(outdir): os.mkdir(outdir) print(" building pangenome matrices") build_gene_matrices(sp.id, sp.samples, args) write_gene_matrices(sp.id, sp.samples, args) print(" writing gene info file") write_gene_info(sp.id, args) print(" writing summary statistics") merge.write_summary_stats(sp.id, sp.samples, args, 'genes') print("")