def checkArgs(args): # Checks input for errors args.o = checkDir(args.o, True) if os.path.isdir(args.i) == True: args.i = checkDir(args.i) elif os.path.isfile(args.i) == False: print("\n\t[Error] Please specify valid xls/xlsx file or directory. Exiting.") quit() return args
def __init__(self, indir, outfile): self.columns = None self.records = {} self.species = {} self.indir = unixpath.checkDir(indir) self.outfile = outfile self.ext = unixpath.getExt(self.outfile) self.delim = self.__setDelim__() self.totalsfile = self.__setTotalsFile__() self.header = "Accession,CommonName,Species,Pathology#,DateOfDeath,MannerOfDeath,\ CauseOfDeath,Tumor,Location,Type,CancerDiagnosis,Malignant,MorphologicalDiagnosis\n" # Make sure input directory exists unixpath.checkDir(self.indir)
def __init__(self, args): self.commands = [] self.config = "config.txt" self.outdir = unixpath.checkDir(args.o, True) self.password = getpass(prompt="\n\tEnter MySQL password: ") self.user = args.u self.__setConfig__()
def main(): parser = ArgumentParser("This script will check the output logs from \ mutect2parallel to identify any samples which were not successful.") parser.add_argument("outdir", help="Path to output directory of mutect2 parallel.") args = parser.parse_args() outdir = checkDir(args.outdir) identifyFails(outdir)
def __init__(self, args): unixpath.checkFile(args.d) self.indir = unixpath.checkDir(args.p) self.infile = args.d self.outfile = args.o self.species = {} self.__readDeathBooks__() self.__readPopulationFiles__() self.__write__()
def __setInfiles__(self, infiles): # Stores input file(s) in list print("\n\tLocating input file(s)...") if os.path.isfile(infiles): self.infiles = [infiles] else: infiles = unixpath.checkDir(infiles) for i in ["*.csv", "*.tsv", "*.txt"]: self.infiles.extend(glob(infiles + i))
def main(): start = datetime.now() parser = ArgumentParser("This script will merge Smithsonian National Zoo data, remove dupliates, and calcultate species totals.") parser.add_argument("-i", required = True, help = "Path to input directory") parser.add_argument("-t", required = True, help = "Path to species total directory.") parser.add_argument("-o", required = True, help = "Path to output file (species total file will be written to same directory).") args = parser.parse_args() p = SmithsonianParser(args.i, args.o) p.setSpecies(unixpath.checkDir(args.t)) p.parseInputDirectory() p.getSpeciesTotals() p.writeRecords() print(("\tFinished. Run time: {}\n").format(datetime.now()-start))
def __init__(self, args): pyplot.style.use("seaborn-deep") self.approved ="Approved" self.columns = [["Infant", "Castrated"], ["Masspresent", "Necropsy", "Metastasis"], ["Approved", "Zoo"]] self.fields = ["Infant", "Castrated", "Masspresent", "Necropsy", "Metastasis", "Zoo"] self.id = "ID" self.label = [self.approved, "All"] self.legend = "upper left" self.outdir = unixpath.checkDir(args.o, True) self.records = {} print() for idx, i in enumerate([args.p, args.d, args.s]): self.__setTable__(i, self.columns[idx]) self.__barPlot__()
def __init__(self, args): print("\n\tReading input file...") d = "," if "anage" in args.i: d = "\t" self.x = "Birth weight (g)" self.y = "Weaning weight (g)" elif "Amniote" in args.i: self.x = "birth_or_hatching_weight_g" self.y = "weaning_weight_g" else: self.x = args.x self.y = args.y self.df = read_csv(args.i, delimiter = d, header = 0, index_col = 0) self.outdir = unixpath.checkDir(args.o, True) self.fields = [["female_maturity", "male_maturity", "Gestation", "Weaning", "Infancy"], ["litter_size", "litters_year", "interbirth_interval", "max_longevity", "metabolic_rate", "adult_weight"], ["birth_weight", "weaning_weight", "adult_weight", "growth_rate"]]
def main(): starttime = datetime.now() parser = ArgumentParser("This script will filter mutect2 output files.") parser.add_argument("-t", type = int, default = 1, help = "Number of threads.") parser.add_argument("-c", help = "Path to config file containing reference genome, java jars \ (if using), and mutect options (required; input files are read from sub-directories in output_directory \ and output will be written to same sub-directory).") parser.add_argument("-o", help = "Output directory (if different from directory in config file).") parser.add_argument("--cleanup", action = "store_true", default = False, help = "Remove intermediary files (default is to keep them).") parser.add_argument("--force", action = "store_true", default = False, help = "Force script to re-run filtering (resumes from last complete step by default).") args = parser.parse_args() checkBin() if args.t > cpu_count(): args.t = cpu_count() # Load config file and discard batch template conf, _ = getConf(args.c) conf["cleanup"] = args.cleanup conf["force"] = args.force if args.o: args.o = checkDir(args.o, True) done, flog, blog, ulog = getComplete(args.o, args.force) else: args.o = conf["outpath"] done, flog, blog, ulog = getComplete(conf["outpath"], args.force) variants = getOutdir(conf, args.o, done, flog, blog, ulog) l = len(variants) pool = Pool(processes = args.t) print(("\tComparing samples from {} sets with {} threads...\n").format(l, args.t)) for x in pool.imap_unordered(filterPair, variants): l -= 1 if x[0] == False: print(("\t[Warning] Some files from {} failed comparison.").format(x[1]), flush = True) else: print(("\tAll comparisons for {} run successfully. {} samples remaining.").format(x[1], l), flush = True) pool.close() pool.join() print(("\n\tFinished. Runtime: {}\n").format(datetime.now()-starttime))
def __setTotalsFile__(self): # Stores name of species total file and makes sure output directory exists path = os.path.split(self.outfile)[0] path = unixpath.checkDir(path, True) return os.path.join(path, "smithsonianSpeciesTotals." + self.ext)