def archiveDo(bot, position): if debug: send(bot, "Suggestion moved to the archive. (not really)") return # todo move bot to a test sheet wks = auth() now = getTime() archiveNames = list(filter(fNonEmpty, map(fValue, wks.range('G4:G1300')))) suggestionNames = list(filter(fNonEmpty, map(fValue, wks.range('B4:B100')))) lastSuggestion = len(suggestionNames) + 4 archiveLastPosition = len(archiveNames) + 4 rolledCells = wks.range('B'+str(position)+':E'+ str(position)) rolled = list(map(fValue, rolledCells)) # move archive cells down 1 row archiveOldCells = wks.range('F4:L'+str(archiveLastPosition+1)) lenArchiveOld = len(archiveOldCells) for i in reversed(range(len(archiveOldCells))): if lenArchiveOld > i + 7: archiveOldCells[i+7].value = archiveOldCells[i].value wks.update_cells(archiveOldCells) # move suggestion to F4 archiveCells = wks.range('F4:L4') archiveCells[0].value = now.strftime("%d %b %y") archiveCells[1].value = rolled[0] archiveCells[2].value = rolled[1] archiveCells[3].value = rolled[2] archiveCells[4].value = rolled[3] archiveCells[5].value = "" archiveCells[6].value = "" wks.update_cells(archiveCells) # move suggestions up 1 row suggestionCells = wks.range('B'+str(position)+':E'+str(lastSuggestion)) lenSuggestionCells = len(suggestionCells) for i in range(len(suggestionCells)): if lenSuggestionCells > i + 4: suggestionCells[i].value = suggestionCells[i+4].value wks.update_cells(suggestionCells) send(bot, "Suggestion moved to the archive.")
############################################ #Main Block ############################################ infiles, outfilename = optParse(0) #print infiles, outfilename; print "# =======================================================================" print "# Combining files:\t\t\t", infiles print "# Writing output to:\t\t\t" + outfilename print "# Writing only possible one-to-one orthologs to output file." print "# -------------------------------------" print "# " + core.getTime() + " Reading files..." file_lines = {} key_ids = {} for each in infiles: infile = open(each, "r") inlines = infile.readlines() infile.close() file_lines[each] = [] key_ids[each] = [] i = 0 for line in inlines: if i == 0:
if script_outdir_initial != None: if not os.path.isdir(script_outdir_initial): core.errorOut(8, "-z must be a valid directory") optParse(1) script_outdir = os.path.join( script_outdir_initial, os.path.basename(os.path.normpath(script_outdir))) if outdir_suffix != None: if script_outdir[-1] == "/": script_outdir = script_outdir[:len(script_outdir) - 1] + "-" + outdir_suffix + "/" else: script_outdir = script_outdir + "-" + outdir_suffix + "/" print core.getTime() + " | Creating main output directory:\t" + script_outdir os.system("mkdir '" + script_outdir + "'") logfilename = script_outdir + "supertreemaker.log" logfile = open(logfilename, "w") logfile.write("") logfile.close() ##Pre-run prep: creating log files and output directories, etc... core.logCheck( l, logfilename, "=======================================================================") core.logCheck(l, logfilename, "\tSupertree making with SDM, R, newickutils, and r8s") core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()) core.logCheck(l, logfilename,
infilename, seqdir, speclist, remstart, outdir = optParse(0) print "# =======================================================================" print "# \t\t\tRetrieving sequences in FASTA format" print "# \t\t\t" + core.getDateTime() print "# Retrieving ortholog sequences from:\t", infilename print "# Sequence directory\t\t\t" + seqdir print "# Writing combined sequence files to:\t", outdir if remstart == 1: print "# Removing start Methionines (-m 1)" else: print "# NOT removing start Methionines (-m 0)" print "# Reminder: Please ensure your species dictionary was entered correctly." print "# Note: The script will skip any lines that do not have all species." print "# -------------------------------------" print "# " + core.getTime() + " Preparing species dictionary..." specdict = {} for each in speclist: current = each.split(":") specdict[current[0]] = current[1] #print specdict; print "# -------------------------------------" print "# " + core.getTime( ) + " Reading peptide source files and extracting protein IDs..." tmp_seq_dict = {} for spec in specdict: tmp_seq_dict[spec] = core.fastaGetDict(os.path.join( seqdir, specdict[spec]))
def convCheck(cur_c, c, ropt, targets, backgrounds, d, ins, outs): # cur_c = 0; init_c = cur_c + 1 while cur_c < c: #if c > 1: if ropt != 0: outfilename = outs + "_" + str(cur_c + 1) + ".txt" else: outfilename = outs + ".txt" if ropt != 0: #backgrounds = ["hg19", "rheMac3", "calJac3", "mm10", "rn5", "vicPac2", "bosTau7", "canFam3", "loxAfr3", "papHam1", "monDom5"]; backgrounds = [] cur_r = len(backgrounds) while cur_r < ropt: chosenspec = random.choice(all_specs.values()) if chosenspec not in targets and chosenspec not in backgrounds: backgrounds.append(chosenspec) cur_r = cur_r + 1 outfile = open(outfilename, "w") outfile.write( "# ==============================================================================================\n" ) outfile.write("# \t\t\tConvergence testing\n") outfile.write("# \t\t\t" + core.getDateTime() + "\n") outfile.write("# Using alignments in:\t\t" + indir + "\n") outfile.write("# Target species:\t\t\t" + ", ".join(targets) + "\n") if ropt != 0: outfile.write("# Randomly choosing " + str(r) + " background species and performing " + str(c) + " replicate tests for convergence.\n") outfile.write("# This is replicate number " + str(cur_c + 1) + "\n") outfile.write("# Background species:\t\t" + ", ".join(backgrounds) + "\n") outfile.write("# Writing output to:\t\t\t" + outfilename + "\n") if d == 0: outfile.write("# Checking for convergent sites.\n") elif d == 1: outfile.write("# Checking for divergent sites.\n") outfile.write("# ---------------------------------------------\n") #sys.exit(); #cur_c = cur_c + 1; #continue; aligns = os.listdir(ins) numbars = 0 donepercent = [] count = len(aligns) i = 0 numsites = 0 totgenes = 0 outfile.write("# " + core.getTime() + " Starting Scan...\n") outfile.write( "# Site#\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n" ) for align in aligns: #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars); i = i + 1 if align.find(".fa") == -1: continue #if i > 25: # break; infilename = ins + align #print align; gid = "_".join(align.split("_")[:2]) chrome = align[align.find("chr"):align.find("chr") + 4] inseqs = core.fastaGetDict(infilename) num_targets_present = 0 num_bg_present = 0 for title in inseqs: if any(t in title for t in targets): num_targets_present = num_targets_present + 1 if any(b in title for b in backgrounds): num_bg_present = num_bg_present + 1 if num_targets_present == len(targets) and num_bg_present == len( backgrounds): #print "The following gene has all target and background species and will be checked:\t\t" + gid; totgenes = totgenes + 1 seqlen = len(inseqs[inseqs.keys()[0]]) #print "Alignment length\t\t", seqlen; t_alleles = {} b_alleles = {} for x in xrange(len(inseqs[inseqs.keys()[0]])): for title in inseqs: for t in targets: if t in title: t_alleles[t] = inseqs[title][x] for b in backgrounds: if b in title: b_alleles[b] = inseqs[title][x] t_states = t_alleles.values() t_gap = t_states.count("-") t_missing = t_states.count("X") t_stop = t_states.count("*") b_states = b_alleles.values() b_gap = b_states.count("-") b_missing = b_states.count("X") b_stop = b_states.count("*") t_final = remGapMiss(t_states) b_final = remGapMiss(b_states) #print t_alleles; #print t_states; #print t_gap; #print t_missing; #print t_stop; #print t_final; #print b_alleles; #print b_states; #print b_gap; #print b_missing; #print b_stop; #print b_final; if t_final == [] or b_final == []: continue if d == 0: if len(t_final) == len(targets) and len( b_final) == len(backgrounds) and t_final.count( t_final[0]) == len( t_final) and t_final[0] not in b_final: numsites = numsites + 1 #print core.getTime() + " Convergent site found!"; #print "Filename:\t\t" + align; #print "Chromosome:\t\t" + chrome; #print "Gene ID:\t\t" + gid; #print "Alignment length\t", seqlen; #print "Target alleles:\t\t" + "".join(t_final); #print "Background alleles:\t" + "".join(b_final); #print "---------------"; outline = str( numsites ) + "\t" + chrome + "\t" + gid + "\t" + str( seqlen) + "\t" + str(x + 1) + "\t" + "".join( t_final) + "\t" + "".join(b_final) + "\n" outfile.write(outline) #sys.exit(); elif d == 1: if len(t_final) == len(targets) and len( b_final ) == len(backgrounds) and t_final.count( t_final[0]) != len(t_final) and b_final.count( b_final[0]) == len(b_final): if not any(t in b_final for t in t_final): numsites = numsites + 1 #print "\nDivergent site found!"; #print "Filename:\t\t" + align; #print "Chromosome:\t\t" + chrome; #print "Gene ID:\t\t" + gid; #print "Alignment length\t", seqlen; #print t_final; #print b_final; outline = str( numsites ) + "\t" + chrome + "\t" + gid + "\t" + str( seqlen ) + "\t" + str(x + 1) + "\t" + "".join( t_final) + "\t" + "".join(b_final) + "\n" outfile.write(outline) #pstring = "100.0% complete."; #sys.stderr.write('\b' * len(pstring) + pstring); outfile.write("\n# " + core.getTime() + " Done!\n") outfile.write("# Total sites found: " + str(numsites) + "\n") outfile.write("# Total genes checked: " + str(totgenes) + "\n") outfile.write( "# ==============================================================================================" ) cur_c = cur_c + 1 if ropt != 0: print core.getTime() + " Replicates", init_c, "to", c, "complete."
i = i + 1; if each.find(".fa") == -1: continue; specpos = 0; infilename = ins + each; inseqs = core.fastaGetDict(infilename); for seq in inseqs: tot_pos = tot_pos + len(inseqs[seq]); if disp_file == 1: specpos = specpos + len(inseqs[seq]); print seq + "\t" + str(len(inseqs[seq])); if disp_file == 1: print "Total\t" + str(specpos); if disp_file == 0: pstring = "100.0% complete."; sys.stderr.write('\b' * len(pstring) + pstring); elif disp_file == 1: print "----------"; print "\n" + core.getTime() + " Done!"; print "-----"; print "Total residues:\t", tot_pos; print "=======================================================================";
ins, gb_path, seqtype, m, v, l = optParse(0) starttime = core.getLogTime() if os.path.isfile(ins): fileflag = 1 indir = os.path.dirname(os.path.realpath(ins)) + "/" indir, outdir = core.getOutdir(indir, "run_gblocks", starttime) filelist = [ins] else: fileflag = 0 indir, outdir = core.getOutdir(ins, "run_gblocks", starttime) filelist = os.listdir(indir) print core.getTime() + " | Creating main output directory..." os.system("mkdir " + outdir) logfilename = outdir + "run_gblocks.log" logfile = open(logfilename, "w") logfile.write("") logfile.close() core.logCheck( l, logfilename, "=======================================================================") core.logCheck(l, logfilename, "\t\t\tMasking alignments with GBlocks") core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()) if fileflag == 1: core.logCheck(l, logfilename, "INPUT | Masking alignment from file: " + ins)
elif rep != "" and rep[1] == ":": print "INFO | Replacing all " + rep[0] + " symbols in input sequences with a random AA symbol not present in that column."; suffix = suffix + ".r"; if os.path.isfile(ins): print "OUTPUT | Writing output to the following file:\t\t" + outs; else: if outs[len(outs)-1] != "/": outs = outs + "/"; print "OUTPUT | Writing output to the following directory:\t\t" + outs; if not os.path.exists(outs): print "+Creating output directory."; os.system("mkdir " + outs); print "---------------------------------------------"; #sys.exit(); print core.getTime() + " Starting..."; numfiles = len(filelist); numbars = 0; donepercent = []; i = 0; for each in filelist: if each.find(".fa") == -1: continue; if os.path.isfile(ins): print ins; infilename = ins; outfilename = outs;
indir, script_outdir = core.getOutdir(indir, "supertreemaker", starttime); print script_outdir; print os.path.basename(os.path.normpath(script_outdir)); if script_outdir_initial != None: if not os.path.isdir(script_outdir_initial): core.errorOut(8, "-z must be a valid directory"); optParse(1); script_outdir = os.path.join(script_outdir_initial, os.path.basename(os.path.normpath(script_outdir))); if outdir_suffix != None: if script_outdir[-1] == "/": script_outdir = script_outdir[:len(script_outdir)-1] + "-" + outdir_suffix + "/"; else: script_outdir = script_outdir + "-" + outdir_suffix + "/"; print core.getTime() + " | Creating main output directory:\t" + script_outdir; os.system("mkdir '" + script_outdir + "'"); logfilename = script_outdir + "supertreemaker.log"; logfile = open(logfilename, "w"); logfile.write(""); logfile.close(); ##Pre-run prep: creating log files and output directories, etc... core.logCheck(l, logfilename, "======================================================================="); core.logCheck(l, logfilename, "\tSupertree making with SDM, R, newickutils, and r8s"); core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()); core.logCheck(l, logfilename, "INPUT | Making tree from file:\t\t\t" + infilename); core.logCheck(l, logfilename, "INPUT | Input file located in:\t\t\t" + indir); if njopt == 1: core.logCheck(l, logfilename, "INFO | Using Average Consensus method in SDM to build distance matrix.");
def convCheck(cur_c, c, ropt, targets, backgrounds, d, ins, outs): # cur_c = 0; init_c = cur_c+1; while cur_c < c: #if c > 1: if ropt != 0: outfilename = outs + "_" + str(cur_c+1) + ".txt"; else: outfilename = outs + ".txt"; if ropt != 0: #backgrounds = ["hg19", "rheMac3", "calJac3", "mm10", "rn5", "vicPac2", "bosTau7", "canFam3", "loxAfr3", "papHam1", "monDom5"]; backgrounds = []; cur_r = len(backgrounds); while cur_r < ropt: chosenspec = random.choice(all_specs.values()); if chosenspec not in targets and chosenspec not in backgrounds: backgrounds.append(chosenspec); cur_r = cur_r + 1; outfile = open(outfilename, "w"); outfile.write("# ==============================================================================================\n"); outfile.write("# \t\t\tConvergence testing\n"); outfile.write("# \t\t\t" + core.getDateTime() + "\n"); outfile.write("# Using alignments in:\t\t" + indir + "\n"); outfile.write("# Target species:\t\t\t" + ", ".join(targets) + "\n"); if ropt != 0: outfile.write("# Randomly choosing " + str(r) + " background species and performing " + str(c) + " replicate tests for convergence.\n"); outfile.write("# This is replicate number " + str(cur_c+1) + "\n"); outfile.write("# Background species:\t\t" + ", ".join(backgrounds) + "\n"); outfile.write("# Writing output to:\t\t\t" + outfilename + "\n"); if d == 0: outfile.write("# Checking for convergent sites.\n"); elif d == 1: outfile.write("# Checking for divergent sites.\n"); outfile.write("# ---------------------------------------------\n"); #sys.exit(); #cur_c = cur_c + 1; #continue; aligns = os.listdir(ins); numbars = 0; donepercent = []; count = len(aligns); i = 0; numsites = 0; totgenes = 0; outfile.write("# " + core.getTime() + " Starting Scan...\n"); outfile.write("# Site#\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n"); for align in aligns: #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars); i = i + 1; if align.find(".fa") == -1: continue; #if i > 25: # break; infilename = ins + align; #print align; gid = "_".join(align.split("_")[:2]); chrome = align[align.find("chr"):align.find("chr")+4] inseqs = core.fastaGetDict(infilename); num_targets_present = 0; num_bg_present = 0; for title in inseqs: if any(t in title for t in targets): num_targets_present = num_targets_present + 1; if any(b in title for b in backgrounds): num_bg_present = num_bg_present + 1; if num_targets_present == len(targets) and num_bg_present == len(backgrounds): #print "The following gene has all target and background species and will be checked:\t\t" + gid; totgenes = totgenes + 1; seqlen = len(inseqs[inseqs.keys()[0]]); #print "Alignment length\t\t", seqlen; t_alleles = {}; b_alleles = {}; for x in xrange(len(inseqs[inseqs.keys()[0]])): for title in inseqs: for t in targets: if t in title: t_alleles[t] = inseqs[title][x]; for b in backgrounds: if b in title: b_alleles[b] = inseqs[title][x]; t_states = t_alleles.values(); t_gap = t_states.count("-"); t_missing = t_states.count("X"); t_stop = t_states.count("*"); b_states = b_alleles.values(); b_gap = b_states.count("-"); b_missing = b_states.count("X"); b_stop = b_states.count("*"); t_final = remGapMiss(t_states); b_final = remGapMiss(b_states); #print t_alleles; #print t_states; #print t_gap; #print t_missing; #print t_stop; #print t_final; #print b_alleles; #print b_states; #print b_gap; #print b_missing; #print b_stop; #print b_final; if t_final == [] or b_final == []: continue; if d == 0: if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) == len(t_final) and t_final[0] not in b_final: numsites = numsites + 1; #print core.getTime() + " Convergent site found!"; #print "Filename:\t\t" + align; #print "Chromosome:\t\t" + chrome; #print "Gene ID:\t\t" + gid; #print "Alignment length\t", seqlen; #print "Target alleles:\t\t" + "".join(t_final); #print "Background alleles:\t" + "".join(b_final); #print "---------------"; outline = str(numsites) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n"; outfile.write(outline); #sys.exit(); elif d == 1: if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) != len(t_final) and b_final.count(b_final[0]) == len(b_final): if not any(t in b_final for t in t_final): numsites = numsites + 1; #print "\nDivergent site found!"; #print "Filename:\t\t" + align; #print "Chromosome:\t\t" + chrome; #print "Gene ID:\t\t" + gid; #print "Alignment length\t", seqlen; #print t_final; #print b_final; outline = str(numsites) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n"; outfile.write(outline); #pstring = "100.0% complete."; #sys.stderr.write('\b' * len(pstring) + pstring); outfile.write("\n# " + core.getTime() + " Done!\n"); outfile.write("# Total sites found: " + str(numsites) + "\n"); outfile.write("# Total genes checked: " + str(totgenes) + "\n"); outfile.write("# =============================================================================================="); cur_c = cur_c + 1; if ropt != 0: print core.getTime() + " Replicates", init_c, "to", c, "complete.";
print "INFO | Replacing all " + rep[ 0] + " symbols in input sequences with a random AA symbol not present in that column." suffix = suffix + ".r" if os.path.isfile(ins): print "OUTPUT | Writing output to the following file:\t\t" + outs else: if outs[len(outs) - 1] != "/": outs = outs + "/" print "OUTPUT | Writing output to the following directory:\t\t" + outs if not os.path.exists(outs): print "+Creating output directory." os.system("mkdir " + outs) print "---------------------------------------------" #sys.exit(); print core.getTime() + " Starting..." numfiles = len(filelist) numbars = 0 donepercent = [] i = 0 for each in filelist: if each.find(".fa") == -1: continue if os.path.isfile(ins): print ins infilename = ins outfilename = outs
script_outdir = script_outdir[:script_outdir.index("-") + 1] + str(counter) counter += 1 if os.path.isfile(ins): fileflag = 1 filelist = [os.path.abspath(ins)] else: fileflag = 0 filelist = os.listdir(ins) ins = os.path.abspath(ins) script_outdir = os.path.abspath(script_outdir) bestdir = os.path.join(script_outdir, "raxml-best") outdir = os.path.join(script_outdir, "raxml-out") print core.getTime() + " | Creating main output directory:\t" + script_outdir os.system("mkdir '" + script_outdir + "'") logfilename = os.path.join(script_outdir, "run_raxml.log") core.filePrep(logfilename) core.logCheck( l, logfilename, "=======================================================================") core.logCheck(l, logfilename, "\t\t\tBuilding trees with RAxML") core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()) if fileflag == 1: core.logCheck(l, logfilename, "INPUT | Making tree from file:\t\t" + ins) else: core.logCheck(l, logfilename,
def convCheck(cur_c, c, number_specs, d, ins, outs): # cur_c = 0; init_c = cur_c + 1 while cur_c < c: #if c > 1: spec_list = all_specs.values() rep_specs = [] while len(rep_specs) < number_specs: r = random.choice(spec_list) rep_specs.append(r) spec_list.remove(r) outfilename = outs + "_" + str(cur_c + 1) + ".txt" outfile = open(outfilename, "w") outfile.write( "# ==============================================================================================\n" ) outfile.write("# \t\t\tConvergence testing\n") outfile.write("# \t\t\t" + core.getDateTime() + "\n") outfile.write("# Using alignments in:\t\t" + indir + "\n") outfile.write("# Randomly choosing " + str(number_specs) + " species and performing " + str(c) + " replicate tests for convergence.\n") outfile.write("# This is replicate number " + str(cur_c + 1) + "\n") outfile.write("# Writing output to:\t\t\t" + outfilename + "\n") if d == 0: outfile.write("# Checking for convergent sites.\n") elif d == 1: outfile.write("# Checking for divergent sites.\n") outfile.write("# Using species:\t" + ",".join(rep_specs)) outfile.write("# ---------------------------------------------\n") #sys.exit(); #cur_c = cur_c + 1; #continue; aligns = os.listdir(ins) numbars = 0 donepercent = [] count = len(aligns) i = 0 numsites = 0 totgenes = 0 outfile.write("# " + core.getTime() + " Starting Scan...\n") outfile.write( "# Site#\tTargetSpecs\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n" ) for align in aligns: #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars); i = i + 1 if align.find(".fa") == -1: continue infilename = os.path.join(ins, align) gid = "_".join(align.split("_")[:2]) chrome = align[align.find("chr"):align.find("chr") + 4] inseqs = core.fastaGetDict(infilename) for t1 in rep_specs: for t2 in rep_specs: if t1 == t2: continue targets = [t1, t2] backgrounds = [ spec for spec in rep_specs if spec not in targets ] num_targets_present = 0 num_bg_present = 0 for title in inseqs: if any(t in title for t in targets): num_targets_present = num_targets_present + 1 if any(b in title for b in backgrounds): num_bg_present = num_bg_present + 1 if num_targets_present == len( targets) and num_bg_present == len(backgrounds): # print "The following gene has all target and background species and will be checked:\t\t" + gid; totgenes = totgenes + 1 seqlen = len(inseqs[inseqs.keys()[0]]) # print "Alignment length\t\t", seqlen; t_alleles = {} b_alleles = {} for x in xrange(len(inseqs[inseqs.keys()[0]])): for title in inseqs: cur_spec = title[1:].replace("\n", "") if cur_spec in targets: t_alleles[cur_spec] = inseqs[title][x] if cur_spec in backgrounds: b_alleles[cur_spec] = inseqs[title][x] t_states = t_alleles.values() #t_gap = t_states.count("-"); #t_missing = t_states.count("X"); #t_stop = t_states.count("*"); b_states = b_alleles.values() #b_gap = b_states.count("-"); #b_missing = b_states.count("X"); #b_stop = b_states.count("*"); t_final = remGapMiss(t_states) b_final = remGapMiss(b_states) if t_final == [] or b_final == []: continue if d == 0: if len(t_final) == len(targets) and len( b_final ) == len(backgrounds) and t_final.count( t_final[0]) == len( t_final ) and t_final[0] not in b_final: numsites = numsites + 1 print core.getTime( ) + " Convergent site found!" print "Filename:\t\t" + align print "Chromosome:\t\t" + chrome print "Gene ID:\t\t" + gid print "Alignment length\t", seqlen print "Target alleles:\t\t" + "".join( t_final) print "Background alleles:\t" + "".join( b_final) print "---------------" outline = str(numsites) + "\t" + ",".join( targets ) + "\t" + chrome + "\t" + gid + "\t" + str( seqlen) + "\t" + str( x + 1) + "\t" + "".join( t_final) + "\t" + "".join( b_final) + "\n" outfile.write(outline) elif d == 1: if len(t_final) == len(targets) and len( b_final ) == len(backgrounds) and t_final.count( t_final[0]) != len( t_final) and b_final.count( b_final[0]) == len(b_final): if not any(t in b_final for t in t_final): numsites = numsites + 1 # print "\nDivergent site found!"; # print "Filename:\t\t" + align; # print "Chromosome:\t\t" + chrome; # print "Gene ID:\t\t" + gid; # print "Alignment length\t", seqlen; # print t_final; # print b_final; outline = str( numsites ) + "\t" + ",".join( targets ) + "\t" + chrome + "\t" + gid + "\t" + str( seqlen) + "\t" + str( x + 1) + "\t" + "".join( t_final) + "\t" + "".join( b_final) + "\n" outfile.write(outline) #pstring = "100.0% complete."; #sys.stderr.write('\b' * len(pstring) + pstring); outfile.write("\n# " + core.getTime() + " Done!\n") outfile.write("# Total sites found: " + str(numsites) + "\n") outfile.write("# Total genes checked: " + str(totgenes) + "\n") outfile.write( "# ==============================================================================================" ) cur_c = cur_c + 1 if ropt != 0: print core.getTime() + " Replicates", init_c, "to", c, "complete."
reps) + " replicate tests." print "The task will be split into " + str( num_t ) + " processes and species will be chosen as the processes are split." if cd == 0: print "Checking for convergent sites." elif cd == 1: print "Checking for divergent sites." print "---------------------------------------------" if num_t > 1: bgs = [] processes = [] output = mp.Queue() reps_per_t = reps / num_t print core.getTime() + " Generating function calls..." print "Function calls and arguments are as follows:" print "convCheck([start replicate], [stop replicate], [# random species], [target species], [background species], [convergence/divergence option], [input directory], [output prefix])" print "----------" x = 0 while x < num_t: start_rep = reps_per_t * x stop_rep = reps_per_t * (x + 1) print core.getTime() + " Call " + str(x + 1) + ": convCheck(" + str( start_rep) + " , " + str(stop_rep) + " , " + str( specs) + " , " + str(cd) + ", " + indir + " , " + outfix + ")" processes.append( mp.Process(target=convCheck, args=(start_rep, stop_rep, specs, cd, indir, outfix))) x = x + 1 print "----------"
filelist = [ins]; else: fileflag = 0; indir, script_outdir = core.getOutdir(ins, "run_codeml", starttime); if outdir_suffix != None: if script_outdir[-1] == "/": script_outdir = script_outdir[:len(script_outdir)-1] + "-" + outdir_suffix + "/"; else: script_outdir = script_outdir + "-" + outdir_suffix + "/"; outdir = script_outdir + "codeml_out/"; filelist = os.listdir(indir); if aopt == 1: ancdir = script_outdir + "anc_seqs_fa/"; print core.getTime() + " | Creating main output directory:\t" + script_outdir; os.system("mkdir " + script_outdir); logfilename = script_outdir + "run_codeml.log"; logfile = open(logfilename, "w"); logfile.write(""); logfile.close(); core.logCheck(l, logfilename, "======================================================================="); core.logCheck(l, logfilename, "\t\t\tRunning codeml"); core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()); if fileflag == 1: core.logCheck(l, logfilename, "INPUT | Making tree from file:\t\t" + ins); else: core.logCheck(l, logfilename, "INPUT | Running codeml on all files in:\t" + indir); core.logCheck(l, logfilename, "INFO | PAML path set to:\t\t\t" + ppath);
indir, script_outdir = core.getOutdir(indir, "run_codeml", starttime) outdir = os.path.join(script_outdir, "codeml_out") if aopt == 1: ancdir = os.path.join(script_outdir, "anc_seqs_fa") filelist = [ins] else: fileflag = 0 indir, script_outdir = core.getOutdir(ins, "run_codeml", outdir_suffix, starttime) outdir = os.path.join(script_outdir, "codeml_out") filelist = os.listdir(indir) if aopt == 1: ancdir = os.path.join(script_outdir, "anc_seqs_fa") print core.getTime() + " | Creating main output directory:\t" + script_outdir mk_cmd = "mkdir " + script_outdir os.system(mk_cmd) logfilename = os.path.join(script_outdir, "run_codeml.log") # logfile = open(logfilename, "w"); # logfile.write(""); # logfile.close(); core.logCheck( l, logfilename, "=======================================================================") core.logCheck(l, logfilename, "\t\t\tRunning codeml") core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()) if fileflag == 1: core.logCheck(l, logfilename,
print "\t\t\tSequence format conversion"; print "\t\t\t" + core.getDateTime(); if fileflag == 1: print "INPUT | Converting file: " + ins; else: print "INPUT | Converting all files from directory: " + ins; print "INFO | Input format: " + fr; print "INFO | Output format: " + to; if fileflag == 1: print "OUTPUT | Writing output to file: " + outs; else: print "OUTPUT | Writing output files to directory: " + outs; print "-------------------------------------"; if fileflag == 0: print core.getTime() + " | Creating output directory..."; if not os.path.exists(outs): os.system("mkdir " + outs); numfiles = len(filelist); numbars = 0; donepercent = []; i = 0; if fr == "f": init = ".fa"; elif fr == "p": init = ".ph"; elif fr == "n": init = ".nex";
print "\t\t\t" + core.getDateTime(); print "Using alignments in:\t\t" + indir print "Choosing " + str(specs) + " species randomly and performing " + str(reps) + " replicate tests."; print "The task will be split into " + str(num_t) + " processes and species will be chosen as the processes are split."; if cd == 0: print "Checking for convergent sites."; elif cd == 1: print "Checking for divergent sites."; print "---------------------------------------------"; if num_t > 1: bgs = []; processes = [] output = mp.Queue() reps_per_t = reps / num_t; print core.getTime() + " Generating function calls..."; print "Function calls and arguments are as follows:"; print "convCheck([start replicate], [stop replicate], [# random species], [target species], [background species], [convergence/divergence option], [input directory], [output prefix])"; print "----------"; x = 0; while x < num_t: start_rep = reps_per_t * x; stop_rep = reps_per_t * (x+1); print core.getTime() + " Call " + str(x+1) + ": convCheck(" + str(start_rep) + " , " + str(stop_rep) + " , " + str(specs) + " , " + str(cd) + ", " + indir + " , " + outfix + ")"; processes.append(mp.Process(target=convCheck,args=(start_rep,stop_rep,specs,cd,indir,outfix))); x = x + 1; print "----------"; ##print processes; ##convCheck( for p in processes: print core.getTime() + " start", p;
ins, gb_path, seqtype, m, v, l = optParse(0); starttime = core.getLogTime(); if os.path.isfile(ins): fileflag = 1; indir = os.path.dirname(os.path.realpath(ins)) + "/"; indir, outdir = core.getOutdir(indir, "run_gblocks", starttime); filelist = [ins]; else: fileflag = 0; indir, outdir = core.getOutdir(ins, "run_gblocks", starttime); filelist = os.listdir(indir); print core.getTime() + " | Creating main output directory..."; os.system("mkdir " + outdir); logfilename = outdir + "run_gblocks.log"; logfile = open(logfilename, "w"); logfile.write(""); logfile.close(); core.logCheck(l, logfilename, "======================================================================="); core.logCheck(l, logfilename, "\t\t\tMasking alignments with GBlocks"); core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()); if fileflag == 1: core.logCheck(l, logfilename, "INPUT | Masking alignment from file: " + ins); else: core.logCheck(l, logfilename, "INPUT | Masking alignments from all files in: " + indir); core.logCheck(l, logfilename, "INFO | GBlocks path set to: " + gb_path);
def convCheck(cur_c, c, number_specs, d, ins, outs): # cur_c = 0; init_c = cur_c+1; while cur_c < c: #if c > 1: spec_list = all_specs.values(); rep_specs = []; while len(rep_specs) < number_specs: r = random.choice(spec_list); rep_specs.append(r); spec_list.remove(r); outfilename = outs + "_" + str(cur_c+1) + ".txt"; outfile = open(outfilename, "w"); outfile.write("# ==============================================================================================\n"); outfile.write("# \t\t\tConvergence testing\n"); outfile.write("# \t\t\t" + core.getDateTime() + "\n"); outfile.write("# Using alignments in:\t\t" + indir + "\n"); outfile.write("# Randomly choosing " + str(number_specs) + " species and performing " + str(c) + " replicate tests for convergence.\n"); outfile.write("# This is replicate number " + str(cur_c+1) + "\n"); outfile.write("# Writing output to:\t\t\t" + outfilename + "\n"); if d == 0: outfile.write("# Checking for convergent sites.\n"); elif d == 1: outfile.write("# Checking for divergent sites.\n"); outfile.write("# Using species:\t" + ",".join(rep_specs)); outfile.write("# ---------------------------------------------\n"); #sys.exit(); #cur_c = cur_c + 1; #continue; aligns = os.listdir(ins); numbars = 0; donepercent = []; count = len(aligns); i = 0; numsites = 0; totgenes = 0; outfile.write("# " + core.getTime() + " Starting Scan...\n"); outfile.write("# Site#\tTargetSpecs\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n"); for align in aligns: #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars); i = i + 1; if align.find(".fa") == -1: continue; infilename = os.path.join(ins, align); gid = "_".join(align.split("_")[:2]); chrome = align[align.find("chr"):align.find("chr")+4] inseqs = core.fastaGetDict(infilename); for t1 in rep_specs: for t2 in rep_specs: if t1 == t2: continue; targets = [t1, t2]; backgrounds = [spec for spec in rep_specs if spec not in targets]; num_targets_present = 0; num_bg_present = 0; for title in inseqs: if any(t in title for t in targets): num_targets_present = num_targets_present + 1; if any(b in title for b in backgrounds): num_bg_present = num_bg_present + 1; if num_targets_present == len(targets) and num_bg_present == len(backgrounds): # print "The following gene has all target and background species and will be checked:\t\t" + gid; totgenes = totgenes + 1; seqlen = len(inseqs[inseqs.keys()[0]]); # print "Alignment length\t\t", seqlen; t_alleles = {}; b_alleles = {}; for x in xrange(len(inseqs[inseqs.keys()[0]])): for title in inseqs: cur_spec = title[1:].replace("\n",""); if cur_spec in targets: t_alleles[cur_spec] = inseqs[title][x]; if cur_spec in backgrounds: b_alleles[cur_spec] = inseqs[title][x]; t_states = t_alleles.values(); #t_gap = t_states.count("-"); #t_missing = t_states.count("X"); #t_stop = t_states.count("*"); b_states = b_alleles.values(); #b_gap = b_states.count("-"); #b_missing = b_states.count("X"); #b_stop = b_states.count("*"); t_final = remGapMiss(t_states); b_final = remGapMiss(b_states); if t_final == [] or b_final == []: continue; if d == 0: if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) == len(t_final) and t_final[0] not in b_final: numsites = numsites + 1; print core.getTime() + " Convergent site found!"; print "Filename:\t\t" + align; print "Chromosome:\t\t" + chrome; print "Gene ID:\t\t" + gid; print "Alignment length\t", seqlen; print "Target alleles:\t\t" + "".join(t_final); print "Background alleles:\t" + "".join(b_final); print "---------------"; outline = str(numsites) + "\t" + ",".join(targets) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n"; outfile.write(outline); elif d == 1: if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) != len(t_final) and b_final.count(b_final[0]) == len(b_final): if not any(t in b_final for t in t_final): numsites = numsites + 1; # print "\nDivergent site found!"; # print "Filename:\t\t" + align; # print "Chromosome:\t\t" + chrome; # print "Gene ID:\t\t" + gid; # print "Alignment length\t", seqlen; # print t_final; # print b_final; outline = str(numsites) + "\t" + ",".join(targets) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n"; outfile.write(outline); #pstring = "100.0% complete."; #sys.stderr.write('\b' * len(pstring) + pstring); outfile.write("\n# " + core.getTime() + " Done!\n"); outfile.write("# Total sites found: " + str(numsites) + "\n"); outfile.write("# Total genes checked: " + str(totgenes) + "\n"); outfile.write("# =============================================================================================="); cur_c = cur_c + 1; if ropt != 0: print core.getTime() + " Replicates", init_c, "to", c, "complete.";
print "Sequence\tLength" i = i + 1 if each.find(".fa") == -1: continue specpos = 0 infilename = ins + each inseqs = core.fastaGetDict(infilename) for seq in inseqs: tot_pos = tot_pos + len(inseqs[seq]) if disp_file == 1: specpos = specpos + len(inseqs[seq]) print seq + "\t" + str(len(inseqs[seq])) if disp_file == 1: print "Total\t" + str(specpos) if disp_file == 0: pstring = "100.0% complete." sys.stderr.write('\b' * len(pstring) + pstring) elif disp_file == 1: print "----------" print "\n" + core.getTime() + " Done!" print "-----" print "Total residues:\t", tot_pos print "======================================================================="
if os.path.isfile(ins): fileflag = 1 indir = os.path.dirname(os.path.realpath(ins)) + "/" indir, script_outdir = core.getOutdir(indir, "run_raxml", starttime) bestdir = script_outdir + "raxml_best/" outdir = script_outdir + "raxml_out/" filelist = [ins] else: fileflag = 0 indir, script_outdir = core.getOutdir(ins, "run_raxml", starttime) bestdir = script_outdir + "raxml_best/" outdir = script_outdir + "raxml_out/" filelist = os.listdir(indir) print core.getTime() + " | Creating main output directory:\t" + script_outdir os.system("mkdir '" + script_outdir + "'") logfilename = script_outdir + "run_raxml.log" logfile = open(logfilename, "w") logfile.write("") logfile.close() core.logCheck(l, logfilename, "=======================================================================") core.logCheck(l, logfilename, "\t\t\tBuilding trees with RAxML") core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime()) if fileflag == 1: core.logCheck(l, logfilename, "INPUT | Making tree from file:\t\t" + indir) else: core.logCheck(l, logfilename, "INPUT | Making trees from all files in:\t" + indir) core.logCheck(l, logfilename, "INPUT | RAxML path set to:\t\t\t" + rax_path)