def launch(args): pangenome = Pangenome() filename = mkFilename(args.basename, args.output, args.force) if args.anno: #if the annotations are provided, we read from it getSeq = True if args.clusters is not None: getSeq = False readAnnotations(pangenome, args.anno, getSeq) writePangenome(pangenome, filename, args.force) if args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is None: raise Exception( "The gff/gbff provided did not have any sequence informations, you did not provide clusters and you did not provide fasta file. Thus, we do not have the information we need to continue the analysis." ) elif args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is not None: getGeneSequencesFromFastas(pangenome, args.fasta) if args.clusters is not None: readClustering(pangenome, args.clusters) elif args.clusters is None: #we should have the sequences here. clustering(pangenome, args.tmpdir, args.cpu) elif args.fasta is not None: pangenome = Pangenome() annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu) writePangenome(pangenome, filename, args.force) clustering(pangenome, args.tmpdir, args.cpu) computeNeighborsGraph(pangenome) partition(pangenome, tmpdir=args.tmpdir, cpu=args.cpu, K=args.nb_of_partitions) writePangenome(pangenome, filename, args.force) if args.rarefaction: makeRarefactionCurve(pangenome, args.output, args.tmpdir, cpu=args.cpu) if len(pangenome.organisms) < 5000: drawTilePlot(pangenome, args.output, nocloud=False if len(pangenome.organisms) < 500 else True) drawUCurve(pangenome, args.output) writeFlatFiles(pangenome, args.output, args.cpu, csv=True, genePA=True, gexf=True, light_gexf=True, projection=True, json=True, stats=True, partitions=True) printInfo(filename, content=True)
def launch(args): check_option_workflow(args) pangenome = Pangenome() filename = mkFilename(args.basename, args.output, args.force) writing_time, anno_time, clust_time, mod_time, desc_time = (None, None, None, None, None) if args.anno: # if the annotations are provided, we read from it start_anno = time.time() readAnnotations(pangenome, args.anno, cpu=args.cpu, disable_bar=args.disable_prog_bar) anno_time = time.time() - start_anno start_writing = time.time() writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) writing_time = time.time() - start_writing if args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is None: raise Exception( "The gff/gbff provided did not have any sequence informations, " "you did not provide clusters and you did not provide fasta file. " "Thus, we do not have the information we need to continue the analysis." ) elif args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is not None: getGeneSequencesFromFastas(pangenome, args.fasta) start_clust = time.time() if args.clusters is not None: readClustering(pangenome, args.clusters, disable_bar=args.disable_prog_bar) elif args.clusters is None: # we should have the sequences here. clustering(pangenome, args.tmpdir, args.cpu, identity=args.identity, coverage=args.coverage, mode=args.mode, defrag=not args.no_defrag, disable_bar=args.disable_prog_bar) clust_time = time.time() - start_clust elif args.fasta is not None: start_anno = time.time() annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu, contig_filter=args.contig_filter, disable_bar=args.disable_prog_bar) anno_time = time.time() - start_anno start_writing = time.time() writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) writing_time = time.time() - start_writing start_clust = time.time() clustering(pangenome, args.tmpdir, args.cpu, identity=args.identity, coverage=args.coverage, mode=args.mode, defrag=not args.no_defrag, disable_bar=args.disable_prog_bar) clust_time = time.time() - start_clust writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) start_graph = time.time() computeNeighborsGraph(pangenome, disable_bar=args.disable_prog_bar) graph_time = time.time() - start_graph start_part = time.time() partition(pangenome, tmpdir=args.tmpdir, cpu=args.cpu, K=args.nb_of_partitions, disable_bar=args.disable_prog_bar) part_time = time.time() - start_part start_writing = time.time() writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) writing_time = writing_time + time.time() - start_writing start_regions = time.time() predictRGP(pangenome, disable_bar=args.disable_prog_bar) regions_time = time.time() - start_regions start_spots = time.time() predictHotspots(pangenome, args.output, disable_bar=args.disable_prog_bar) spot_time = time.time() - start_spots start_mods = time.time() predictModules(pangenome=pangenome, cpu=args.cpu, tmpdir=args.tmpdir, disable_bar=args.disable_prog_bar) mod_time = time.time() - start_mods start_writing = time.time() writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) writing_time = writing_time + time.time() - start_writing if not args.only_pangenome: start_spot_drawing = time.time() mkOutdir(args.output + '/spot_figures', force=True) drawSpots(pangenome=pangenome, output=args.output + '/spot_figures', spot_list='all', disable_bar=args.disable_prog_bar) spot_time = spot_time + time.time() - start_spot_drawing if args.rarefaction: makeRarefactionCurve(pangenome, args.output, args.tmpdir, cpu=args.cpu, disable_bar=args.disable_prog_bar) if 1 < len(pangenome.organisms) < 5000: drawTilePlot( pangenome, args.output, nocloud=False if len(pangenome.organisms) < 500 else True) drawUCurve(pangenome, args.output) start_desc = time.time() writeFlatFiles(pangenome, args.output, args.cpu, csv=True, genePA=True, gexf=True, light_gexf=True, projection=True, json=True, stats=True, partitions=True, regions=True, spots=True, borders=True, spot_modules=True, modules=True) desc_time = time.time() - start_desc logging.getLogger().info( f"Annotation took : {round(anno_time, 2)} seconds") logging.getLogger().info( f"Clustering took : {round(clust_time, 2)} seconds") logging.getLogger().info( f"Building the graph took : {round(graph_time, 2)} seconds") logging.getLogger().info( f"Partitioning the pangenome took : {round(part_time, 2)} seconds") logging.getLogger().info( f"Predicting RGP took : {round(regions_time, 2)} seconds") logging.getLogger().info( f"Gathering RGP into spots took : {round(spot_time, 2)} seconds") logging.getLogger().info( f"Predicting modules took : {round(mod_time, 2)} seconds") logging.getLogger().info( f"Writing the pangenome data in HDF5 took : {round(writing_time, 2)} seconds" ) if not args.only_pangenome: logging.getLogger().info( f"Writing descriptive files for the pangenome took : {round(desc_time, 2)} seconds" ) printInfo(filename, content=True)
def launch(args): check_option_workflow(args) pangenome = Pangenome() filename = mkFilename(args.basename, args.output, args.force) if args.anno: # if the annotations are provided, we read from it readAnnotations(pangenome, args.anno, cpu=args.cpu, disable_bar=args.disable_prog_bar) writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) if args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is None: raise Exception( "The gff/gbff provided did not have any sequence informations, " "you did not provide clusters and you did not provide fasta file. " "Thus, we do not have the information we need to continue the analysis." ) elif args.clusters is None and pangenome.status[ "geneSequences"] == "No" and args.fasta is not None: getGeneSequencesFromFastas(pangenome, args.fasta) if args.clusters is not None: readClustering(pangenome, args.clusters, disable_bar=args.disable_prog_bar) elif args.clusters is None: # we should have the sequences here. clustering(pangenome, tmpdir=args.tmpdir, cpu=args.cpu, identity=args.identity, coverage=args.coverage, mode=args.mode, defrag=not args.no_defrag, disable_bar=args.disable_prog_bar) elif args.fasta is not None: pangenome = Pangenome() annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu, contig_filter=args.contig_filter, disable_bar=args.disable_prog_bar) writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) clustering(pangenome, tmpdir=args.tmpdir, cpu=args.cpu, identity=args.identity, coverage=args.coverage, mode=args.mode, defrag=not args.no_defrag, disable_bar=args.disable_prog_bar) computeNeighborsGraph(pangenome, disable_bar=args.disable_prog_bar) partition(pangenome, tmpdir=args.tmpdir, cpu=args.cpu, K=args.nb_of_partitions, disable_bar=args.disable_prog_bar) writePangenome(pangenome, filename, args.force, disable_bar=args.disable_prog_bar) if args.rarefaction: makeRarefactionCurve(pangenome, args.output, args.tmpdir, cpu=args.cpu, disable_bar=args.disable_prog_bar) if 1 < len(pangenome.organisms) < 5000: drawTilePlot(pangenome, args.output, nocloud=False if len(pangenome.organisms) < 500 else True) drawUCurve(pangenome, args.output) writeFlatFiles(pangenome, args.output, args.cpu, csv=True, genePA=True, gexf=True, light_gexf=True, projection=True, json=True, stats=True, partitions=True) printInfo(filename, content=True)
def launch(args): pangenome = Pangenome() filename = mkFilename(args.basename, args.output, args.force) if args.anno:#if the annotations are provided, we read from it getSeq = True if args.clusters is not None: getSeq = False start_anno = time.time() readAnnotations(pangenome, args.anno, cpu = args.cpu, getSeq = getSeq, show_bar=args.show_prog_bars) annotime = time.time() - start_anno start_writing = time.time() writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars) writing_time = time.time() - start_writing if args.clusters is None and pangenome.status["geneSequences"] == "No" and args.fasta is None: raise Exception("The gff/gbff provided did not have any sequence informations, you did not provide clusters and you did not provide fasta file. Thus, we do not have the information we need to continue the analysis.") elif args.clusters is None and pangenome.status["geneSequences"] == "No" and args.fasta is not None: getGeneSequencesFromFastas(pangenome, args.fasta) start_clust = time.time() if args.clusters is not None: readClustering(pangenome, args.clusters, show_bar=args.show_prog_bars) elif args.clusters is None:#we should have the sequences here. clustering(pangenome, args.tmpdir, args.cpu, defrag=not args.no_defrag, show_bar=args.show_prog_bars) clust_time = time.time() - start_clust elif args.fasta is not None: start_anno = time.time() annotatePangenome(pangenome, args.fasta, args.tmpdir, args.cpu, show_bar=args.show_prog_bars) annotime = time.time() - start_anno start_writing = time.time() writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars) writing_time = time.time() - start_writing start_clust = time.time() clustering(pangenome, args.tmpdir, args.cpu, defrag=not args.no_defrag, show_bar=args.show_prog_bars) clust_time = time.time() - start_clust writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars) start_graph = time.time() computeNeighborsGraph(pangenome, show_bar=args.show_prog_bars) graph_time = time.time() - start_graph start_part = time.time() partition(pangenome, tmpdir = args.tmpdir, cpu = args.cpu, K=args.nb_of_partitions, show_bar=args.show_prog_bars) part_time = time.time() - start_part start_writing = time.time() writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars) writing_time = writing_time + time.time() - start_writing start_regions = time.time() predictRGP(pangenome, show_bar=args.show_prog_bars) regions_time = time.time() - start_regions start_spots = time.time() predictHotspots(pangenome, args.output, interest=args.interest, show_bar=args.show_prog_bars) spot_time = time.time() - start_spots start_writing = time.time() writePangenome(pangenome, filename, args.force, show_bar=args.show_prog_bars) writing_time = writing_time + time.time() - start_writing if args.rarefaction: makeRarefactionCurve(pangenome,args.output, args.tmpdir, cpu=args.cpu, show_bar=args.show_prog_bars) if len(pangenome.organisms) > 1 and len(pangenome.organisms) < 5000: drawTilePlot(pangenome, args.output, nocloud = False if len(pangenome.organisms) < 500 else True) drawUCurve(pangenome, args.output) start_desc = time.time() writeFlatFiles(pangenome, args.output, args.cpu, csv = True, genePA=True, gexf=True, light_gexf = True, projection=True, json = True, stats = True, partitions = True, regions = True, spots=True) desc_time = time.time() - start_desc logging.getLogger().info(f"Annotation took : {round(annotime,2)} seconds") logging.getLogger().info(f"Clustering took : {round(clust_time,2)} seconds") logging.getLogger().info(f"Building the graph took : {round(graph_time,2)} seconds") logging.getLogger().info(f"Partitionning the pangenome took : {round(part_time,2)} seconds") logging.getLogger().info(f"Predicting RGP took : {round(regions_time,2)} seconds") logging.getLogger().info(f"Gathering RGP into spots took : {round(spot_time,2)} seconds") logging.getLogger().info(f"Writing the pangenome data in HDF5 took : {round(writing_time,2)} seconds") logging.getLogger().info(f"Writing descriptive files for the pangenome took : {round(desc_time,2)} seconds") printInfo(filename, content = True)