def simplepartition(path, npart): if npart < 2: exit() tc_class = TollCheck(path=path) mydict = tc_class.pickleIt mykeys = list(mydict.keys()) window = len(mykeys) / npart if len(mykeys) >= npart else 1 init = 0 done = [] for i in range(0, npart): outhiddenfile = METADATAFILE.format(i) # print(outhiddenfile) names = mykeys[round(init):round(init + window)] # print(init, init + window) # print(round(init), round(init + window)) # out = {i: '' for i in names} out = {i: mydict[i] for i in names} tc_class.__save_obj__(out, outhiddenfile) done += names if len(done) == len(mykeys): break init += window
def main(path, pattern, counter_pattern, isdir): files_out = [] for i in METADATAFILE: files_out.extend(glob.glob(os.path.join(path, i + "*"))) if files_out: sys.stderr.write(HIDDERR % path) exit() selected = [] for p in pattern: selected.extend(catch_names(path, p, isdir)) if counter_pattern: unwanted = [] for cp in counter_pattern: unwanted.extend(catch_names(path, cp, isdir)) selected = set(selected) - set(unwanted) obj = {i: {} for i in sorted(selected)} tc = TollCheck(path=path) tc.__save_obj__(obj=obj, name=os.path.join(path, METADATAFILE[0]))
def main(): args = getOpts() tc_class = TollCheck(path=args.path, step="step6b", req_merge=True) preAln(tc_class=tc_class, threads=args.threads, flank=True, otophysi=False, pattern=args.fasta).create_files() tc_class.massiveaddition()
def main(): args = getOpts() tc_class = TollCheck(path=args.path, step="step6macse", branch=args.branch) macse(tc_class=tc_class, suffix=args.suffix, homovalue=args.min_homo, otophysi=False, memory=int((args.memory * 1024) / args.threads), threads=args.threads, keep=args.keepdb).run() tc_class.massiveaddition()
def main(path): completepath = os.path.join(path, METADATAFILE + "*") matchglobals = glob.glob(completepath) if not matchglobals: exit() tc_class = TollCheck(path=path) out = {} for m in matchglobals: out.update(tc_class.__load_info__(m)) os.remove(m) tc_class.__save_obj__(out)
def main(): args = getOpts() fishfiles = TollCheck(path = args.path, step = "step2b", branch = args.branch) fishsam = samtools(tc_class = fishfiles, threads = args.ncpu) fishsam.run()
def at(path, step, branch, isdir, step_choices): if not isdir: checkstep(step, step_choices) if branch is not None: for b in branch: fishfiles = TollCheck(path=path, step=step, branch=b) fishfiles.massiveaddition(isdir=isdir) else: fishfiles = TollCheck(path=path, step=step, branch=branch) fishfiles.massiveaddition(isdir=isdir)
def main(): args = getOpts() fishfiles = TollCheck(path=args.path, step="step5b", branch=args.branch) flankfiltering = Flankfiltering(tc_class=fishfiles, identity=args.identity, threads=args.threads, fasta=args.fasta, memory=args.memory * 1024, keep=args.keepdb) flankfiltering.run()
def main(): args = getOpts() fishfiles = TollCheck(path=args.path, pattern=args.fastq, step="step3", branch=args.branch) velvet = Velvet(tc_class=fishfiles, assem=args.assem, threads=args.threads) velvet.run()
def create_files(self): self.create_dir() exonlist = self.get_exonlists() taken_exons = [] # taking care of load time # on threads with Pool(processes=self.threads) as p: all_files = [] for path in self.check_corenames: tmp = p.map(self.get_allfiles, glob.glob(ospj(path, "*"))) all_files += filter(None, tmp) # print(all_files) for iexon in exonlist: self.glob_exon = iexon # print(self.glob_exon) # with Pool(processes = self.threads) as p: tmp_seqs = p.map(self.get_exonseq, all_files) tmp_filte = list(filter(None, tmp_seqs)) if tmp_filte: # print(self.glob_exon) # print(tmp_filte) # with Pool(processes = self.threads) as p: [*p.map(self.write_seqs, tmp_filte)] taken_exons += [self.glob_exon] # create metadata for exons if taken_exons: obj = {i + " " * 4: {} for i in taken_exons} tc = TollCheck(path=self.aln_dir) tc.__save_obj__(obj=obj)
def main(): args = getOpts() fishfiles = TollCheck(path=args.path, step="step4", branch=args.branch) atram = aTRAM(tc_class=fishfiles, threads=args.threads, fastq=args.fastq, velvet=args.velvetout, iterations=args.iterations, assambler=args.assambler, memory=args.memory, keep=args.keepdb, runat=args.run_at) atram.run()
def main(): args = getOpts() fishfiles = TollCheck(path = args.path, step = "step1", branch = args.branch) fishtrim = Trimmomatic(tc_class = fishfiles, adapters = args.adapters, threads = args.ncpu, illuminaclip = args.illuminaclip, leading = args.leading, trailing = args.trailing, sliding = args.sliding, minlen = args.minlen, keep = args.keepdb) fishtrim.run()
def main(): parser = getOpts() args = parser.parse_args() choices = { "Percomorph": "step5percomorph", "Elopomorph": "step5elopomorph", "Osteoglossomorph": "step5osteoglossomorph", "Otophysi": "step5otophysi" } step = "" for k, v in choices.items(): if args.group[0].lower() == k.lower(): step = v if not step: sys.stderr.write("\nPlease introduce a proper group\n\n") parser.print_help() exit() fishfiles = TollCheck(path=args.path, step=step, branch=args.branch) cdhitest = Cdhit(tc_class=fishfiles, identity=args.identity, threads=args.threads, fasta=args.fasta, memory=args.memory * 1024) exonerate = Exonerate(tc_class=fishfiles, threads=args.threads, memory=args.memory * 1024, checked_names=cdhitest.check_corenames, identity=cdhitest.identity - 0.01, keep=args.keepdb) cdhitest.run() exonerate.run(input=cdhitest.processed)
def tometadata(path, partition_list=None, step_choices=None): # path = "." fishfiles = TollCheck(path=path) if partition_list is not None: partitions = [] for i in partition_list: part_file = glob.glob(os.path.join(path, METADATAFILE + i)) if part_file: partitions += part_file else: partitions = glob.glob(os.path.join(path, METADATAFILE + "*")) proto_fmt = "%-{}s | %-{}s\n" proto_base = "%s-+-%s\n" if not partitions: mydict = printdict(fishfiles.pickleIt, step_choices) dirmax, stepmax, values = mydict fmt = proto_fmt.format(dirmax, stepmax) # header = proto_fmt % ("Directory", "Steps") sys.stdout.write("\n") sys.stdout.write(fmt % ("Directory", "Steps")) sys.stdout.write(proto_base % ('-' * dirmax, '-' * stepmax)) for cores_steps in values: sys.stdout.write(fmt % cores_steps) sys.stdout.write("\n") else: dirmaxchar = [] stepmaxchar = [] df = {} for f in partitions: key = re.sub(".+_part(.+)", "\\1", f) mydict = printdict(fishfiles.__load_info__(f), step_choices) dirmax, stepmax, values = mydict dirmaxchar.append(dirmax) stepmaxchar.append(stepmax) df.update({key: values}) dirmaxchar_s = sorted(dirmaxchar, reverse=True)[0] stepmaxchar_s = sorted(stepmaxchar, reverse=True)[0] fmt = ("%-{}s | " + proto_fmt).format(6, dirmaxchar_s, stepmaxchar_s) sys.stdout.write("\n") sys.stdout.write(fmt % ("Branch", "Directory", "Steps")) sys.stdout.write(("%s-+-" + proto_base) % ('-' * 6, '-' * dirmaxchar_s, '-' * stepmaxchar_s)) for part in sorted(df.keys()): for cs in df[part]: sys.stdout.write(fmt % ((part, ) + cs)) sys.stdout.write("\n")