parser.add_argument( '--outdir', default='data/', help='optional, directory to write TreeTime output files') parser.add_argument('--ft2bin', default='fasttree2', help='optional, path to fasttree2 binary executable') parser.add_argument('--ttbin', default='treetime', help='optional, path to treetime binary executable') return parser.parse_args() if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Retrieving genomes") fasta = retrieve_genomes(args.db, ref_file=args.ref, misstol=args.misstol) cb.callback("Reconstructing tree with {}".format(args.ft2bin)) nwk = fasttree(fasta, binpath=args.ft2bin) cb.callback("Reconstructing time-scaled tree with {}").format(args.ttbin) nexus_file = treetime(nwk, fasta, outdir=args.outdir, binpath=args.ttbin, clock=args.clock) cb.callback("")
parser.add_argument('--ft2bin', default='fasttree2', help='optional, path to fasttree2 binary executable') parser.add_argument('--ttbin', default='treetime', help='optional, path to treetime binary executable') parser.add_argument('--lineages', type=str, default=os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv"), help="optional, path to CSV file containing Pango lineage designations.") parser.add_argument('--outfile', default='data/timetree.nwk', help='output, path to write Newick tree string') return parser.parse_args() if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Retrieving genomes") with open(args.json) as handle: by_lineage = json.load(handle) cb.callback("Parsing Pango lineage designations") handle = open(args.lineages) header = next(handle) if header != 'taxon,lineage\n': cb.callback("Error: {} does not contain expected header row 'taxon,lineage'".format(args.lineages)) sys.exit() lineages = {} for line in handle: taxon, lineage = line.strip().split(',') lineages.update({taxon: lineage})
with open(args.ref) as handle: reflen = len(seq_utils.convert_fasta(handle)[0][1]) loader = stream_local(args.infile, args.lineages, minlen=args.minlen, mindate=args.mindate, callback=callback) batcher = gisaid_utils.batch_fasta(loader, size=args.batchsize) aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() by_lineage = process_local(args, cb.callback) with open(args.bylineage, 'w') as handle: # export to file to process large lineages with MPI json.dump(by_lineage, handle) # reconstruct time-scaled tree timetree, residuals = build_timetree(by_lineage, args, cb.callback)
with open(args.ref) as handle: reflen = len(seq_utils.convert_fasta(handle)[0][1]) loader = stream_local(args.infile, args.pangolineages, minlen=args.minlen, mindate=args.mindate, callback=callback) batcher = gisaid_utils.batch_fasta(loader, size=args.batchsize) aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # check that the user has included submodules if (not os.path.exists(os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv")) or not os.path.exists(os.path.join(covizu.__path__[0], "data/ProblematicSites_SARS-CoV2/problematic_sites_sarsCov2.vcf"))): try: subprocess.check_call("git submodule init; git submodule update", shell=True) except: cb.callback("Error adding the required submodules")
Called by batch.py via subprocess to handle lineages with excessive numbers of genomes, to process via MPI """ try: from mpi4py import MPI except ModuleNotFoundError: print("Script requires mpi4py - https://pypi.org/project/mpi4py/") sys.exit() comm = MPI.COMM_WORLD my_rank = comm.Get_rank() nprocs = comm.Get_size() # command-line execution args = parse_args() cb = Callback(t0=args.timestamp, my_rank=my_rank, nprocs=nprocs) # import lineage data from file with open(args.json) as handle: recoded = json.load(handle) if args.mode == 'deep': union, labels, indexed = unpack_recoded(recoded, args.lineage, callback=cb.callback) # export map of sequence labels to tip indices lineage_name = args.lineage.replace('/', '_') # issue #297 outfile = os.path.join(args.outdir, '{}.nwk'.format(lineage_name)) if len(indexed) == 1:
if args.url is None and "GISAID_URL" in os.environ: args.url = os.environ["GISAID_URL"] if args.user is None and "GISAID_USER" in os.environ: args.user = os.environ["GISAID_USER"] # otherwise download_feed() will prompt for username if args.password is None and "GISAID_PSWD" in os.environ: args.password = os.environ["GISAID_PSWD"] # otherwise download_feed() will prompt for password return args if __name__ == '__main__': args = parse_args() cb = Callback() cb.callback("Processing GISAID feed data") # download xz file if not specified by user if args.infile is None: args.infile = download_feed(args.url, args.user, args.password) loader = load_gisaid(args.infile, minlen=args.minlen, mindate=args.mindate, debug=args.debug) batcher = batch_fasta(loader, size=args.batchsize) aligned = extract_features(batcher, ref_file=args.ref, binpath=args.binpath,
Called by batch.py via subprocess to handle lineages with excessive numbers of genomes, to process via MPI """ try: from mpi4py import MPI except ModuleNotFoundError: print("Script requires mpi4py - https://pypi.org/project/mpi4py/") sys.exit() comm = MPI.COMM_WORLD my_rank = comm.Get_rank() nprocs = comm.Get_size() # command-line execution args = parse_args() cb = Callback(t0=args.timestamp, my_rank=my_rank, nprocs=nprocs) # import lineage data from file cb.callback('loading JSON') with open(args.json) as handle: by_lineage = json.load(handle) records = by_lineage.get(args.lineage, None) if records is None: cb.callback("ERROR: JSON did not contain lineage {}".format( args.lineage)) sys.exit() # generate distance matrices from bootstrap samples [[ MPI ]] union, labels, indexed = recode_features(records, callback=cb.callback,
batcher = gisaid_utils.batch_fasta(loader, size=args.batchsize) aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # check that the user has included submodules if (not os.path.exists( os.path.join(covizu.__path__[0], "data/pango-designation/lineages.csv") ) or not os.path.exists(
help="Write data to disk for lineages above this " "threshold; otherwise work in RAM. Override " "with `--threads 1`.") parser.add_argument( "--cutoff", type=float, default=0.5, help="Bootstrap cutoff for consensus tree (default 0.5). " "Only used if --cons is specified.") return parser.parse_args() if __name__ == "__main__": # command-line execution args = parse_args() cb = Callback() cb.callback('loading lineage classifications from database') lineages = db_utils.dump_lineages(args.db) cb.callback('loading JSON') features = import_json(args.json, vcf_file=args.vcf, callback=cb.callback) by_lineage = split_by_lineage(features, lineages) for lineage, lfeatures in by_lineage.items(): cb.callback('start {}, {} entries'.format(lineage, len(lfeatures))) # calculate symmetric difference matrix and run NJ on bootstrap samples filtered = seq_utils.filter_outliers(lfeatures) trees, labels = build_trees(filtered, nboot=args.nboot,
batcher = gisaid_utils.batch_fasta(loader, size=args.batchsize) aligned = gisaid_utils.extract_features(batcher, ref_file=args.ref, binpath=args.mmbin, nthread=args.mmthreads, minlen=args.minlen) filtered = gisaid_utils.filter_problematic(aligned, vcf_file=args.vcf, cutoff=args.poisson_cutoff, callback=callback) return gisaid_utils.sort_by_lineage(filtered, callback=callback) if __name__ == "__main__": args = parse_args() cb = Callback() # check that user has loaded openmpi module try: subprocess.check_call(['mpirun', '-np', '2', 'ls'], stdout=subprocess.DEVNULL) except FileNotFoundError: cb.callback("mpirun not loaded - run `module load openmpi/gnu`", level='ERROR') sys.exit() # download xz file if not specified by user if args.infile is None: cb.callback("No input specified, downloading data from GISAID feed...") args.infile = gisaid_utils.download_feed(args.url, args.user, args.password)
type=float, default=0.5, help="Bootstrap cutoff for consensus tree (default 0.5). " "Only used if --cons is specified.") parser.add_argument("outfile", type=argparse.FileType('w'), default='data/clusters.json', help="output, dest for JSON beadplot file") return parser.parse_args() if __name__ == "__main__": args = parse_args() cb = Callback() # Generate time-scaled tree of Pangolin lineages cb.callback("Retrieving lineage genomes") fasta = treetime.retrieve_genomes(args.db, nthread=args.mmthreads, ref_file=args.ref, misstol=args.misstol, callback=cb.callback) cb.callback("Reconstructing tree with {}".format(args.ft2bin)) nwk = treetime.fasttree(fasta, binpath=args.ft2bin) cb.callback("Reconstructing time-scaled tree with {}".format(args.ttbin)) nexus_file = treetime.treetime(nwk, fasta,