default='data/', help='optional, directory to write TreeTime output files') parser.add_argument('--ft2bin', default='fasttree2', help='optional, path to fasttree2 binary executable') parser.add_argument('--ttbin', default='treetime', help='optional, path to treetime binary executable') return parser.parse_args() if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Retrieving genomes") fasta = retrieve_genomes(args.db, ref_file=args.ref, misstol=args.misstol) cb.callback("Reconstructing tree with {}".format(args.ft2bin)) nwk = fasttree(fasta, binpath=args.ft2bin) cb.callback("Reconstructing time-scaled tree with {}").format(args.ttbin) nexus_file = treetime(nwk, fasta, outdir=args.outdir, binpath=args.ttbin, clock=args.clock) cb.callback("") parse_nexus(nexus_file, fasta, date_tol=args.datetol)
parser.add_argument('--ttbin', default='treetime', help='optional, path to treetime binary executable') parser.add_argument('--lineages', type=str, default=os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv"), help="optional, path to CSV file containing Pango lineage designations.") parser.add_argument('--outfile', default='data/timetree.nwk', help='output, path to write Newick tree string') return parser.parse_args() if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Retrieving genomes") with open(args.json) as handle: by_lineage = json.load(handle) cb.callback("Parsing Pango lineage designations") handle = open(args.lineages) header = next(handle) if header != 'taxon,lineage\n': cb.callback("Error: {} does not contain expected header row 'taxon,lineage'".format(args.lineages)) sys.exit() lineages = {} for line in handle: taxon, lineage = line.strip().split(',') lineages.update({taxon: lineage}) cb.callback("Identifying lineage representative genomes")
aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() by_lineage = process_local(args, cb.callback) with open(args.bylineage, 'w') as handle: # export to file to process large lineages with MPI json.dump(by_lineage, handle) # reconstruct time-scaled tree timetree, residuals = build_timetree(by_lineage, args, cb.callback) timestamp = datetime.now().isoformat().split('.')[0] nwk_file = os.path.join(args.outdir, 'timetree.{}.nwk'.format(timestamp)) with open(nwk_file, 'w') as handle: Phylo.write(timetree, file=handle, format='newick') # generate beadplots and serialize to file
aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # check that the user has included submodules if (not os.path.exists(os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv")) or not os.path.exists(os.path.join(covizu.__path__[0], "data/ProblematicSites_SARS-CoV2/problematic_sites_sarsCov2.vcf"))): try: subprocess.check_call("git submodule init; git submodule update", shell=True) except: cb.callback("Error adding the required submodules") sys.exit() # update submodules try: subprocess.check_call("git submodule foreach git pull origin master", shell=True) except:
if my_rank == 0: trees = [phy for batch in result for phy in batch] # flatten nested lists Phylo.write(trees, file=outfile, format='newick') elif args.mode == 'flat': # load list of lineages from text file minor_lineages = [] with open(args.lineage) as handle: for line in handle: minor_lineages.append(line.strip()) for li, lineage in enumerate(minor_lineages): if li % nprocs != my_rank: continue cb.callback("starting {}".format(lineage)) union, labels, indexed = unpack_recoded(recoded, lineage, callback=cb.callback) lineage_name = lineage.replace('/', '_') # issue #297 outfile = os.path.join(args.outdir, '{}.nwk'.format(lineage_name)) if len(indexed) == 1: # lineage only has one variant, no meaningful tree with open(outfile, 'w') as handle: handle.write('({}:0);\n'.format(labels['0'][0])) else: trees = [ bootstrap(union, indexed, args.binpath,
args.url = os.environ["GISAID_URL"] if args.user is None and "GISAID_USER" in os.environ: args.user = os.environ["GISAID_USER"] # otherwise download_feed() will prompt for username if args.password is None and "GISAID_PSWD" in os.environ: args.password = os.environ["GISAID_PSWD"] # otherwise download_feed() will prompt for password return args if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Processing GISAID feed data") # download xz file if not specified by user if args.infile is None: args.infile = download_feed(args.url, args.user, args.password) loader = load_gisaid(args.infile, minlen=args.minlen, mindate=args.mindate, debug=args.debug) batcher = batch_fasta(loader, size=args.batchsize) aligned = extract_features(batcher, ref_file=args.ref, binpath=args.binpath, nthread=args.mmthreads, minlen=args.minlen)
try: from mpi4py import MPI except ModuleNotFoundError: print("Script requires mpi4py - https://pypi.org/project/mpi4py/") sys.exit() comm = MPI.COMM_WORLD my_rank = comm.Get_rank() nprocs = comm.Get_size() # command-line execution args = parse_args() cb = Callback(t0=args.timestamp, my_rank=my_rank, nprocs=nprocs) # import lineage data from file cb.callback('loading JSON') with open(args.json) as handle: by_lineage = json.load(handle) records = by_lineage.get(args.lineage, None) if records is None: cb.callback("ERROR: JSON did not contain lineage {}".format( args.lineage)) sys.exit() # generate distance matrices from bootstrap samples [[ MPI ]] union, labels, indexed = recode_features(records, callback=cb.callback, limit=args.max_variants) # export map of sequence labels to tip indices
vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # check that the user has included submodules if (not os.path.exists( os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv") ) or not os.path.exists( os.path.join( covizu.__path__[0], "data/ProblematicSites_SARS-CoV2/problematic_sites_sarsCov2.vcf" ))): try: subprocess.check_call("git submodule init; git submodule update", shell=True) except:
"with `--threads 1`.") parser.add_argument( "--cutoff", type=float, default=0.5, help="Bootstrap cutoff for consensus tree (default 0.5). " "Only used if --cons is specified.") return parser.parse_args() if __name__ == "__main__": # command-line execution args = parse_args() cb = Callback() cb.callback('loading lineage classifications from database') lineages = db_utils.dump_lineages(args.db) cb.callback('loading JSON') features = import_json(args.json, vcf_file=args.vcf, callback=cb.callback) by_lineage = split_by_lineage(features, lineages) for lineage, lfeatures in by_lineage.items(): cb.callback('start {}, {} entries'.format(lineage, len(lfeatures))) # calculate symmetric difference matrix and run NJ on bootstrap samples filtered = seq_utils.filter_outliers(lfeatures) trees, labels = build_trees(filtered, nboot=args.nboot, threads=args.threads, callback=cb.callback)
vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # download xz file if not specified by user if args.infile is None: cb.callback("No input specified, downloading data from GISAID feed...") args.infile = gisaid_utils.download_feed(args.url, args.user, args.password) by_lineage = process_feed(args, cb.callback) with open(args.bylineage, 'w') as handle: # export to file to process large lineages with MPI json.dump(by_lineage, handle) timetree, residuals = build_timetree(by_lineage, args, cb.callback)
"Only used if --cons is specified.") parser.add_argument("outfile", type=argparse.FileType('w'), default='data/clusters.json', help="output, dest for JSON beadplot file") return parser.parse_args() if __name__ == "__main__": args = parse_args() cb = Callback() # Generate time-scaled tree of Pangolin lineages cb.callback("Retrieving lineage genomes") fasta = treetime.retrieve_genomes(args.db, nthread=args.mmthreads, ref_file=args.ref, misstol=args.misstol, callback=cb.callback) cb.callback("Reconstructing tree with {}".format(args.ft2bin)) nwk = treetime.fasttree(fasta, binpath=args.ft2bin) cb.callback("Reconstructing time-scaled tree with {}".format(args.ttbin)) nexus_file = treetime.treetime(nwk, fasta, outdir=args.outdir, binpath=args.ttbin, clock=args.clock,