Esempio n. 1
0
def archiveDo(bot, position):
    
    if debug:
        send(bot, "Suggestion moved to the archive. (not really)")
        return # todo move bot to a test sheet

    wks = auth()
    now = getTime()

    archiveNames = list(filter(fNonEmpty, map(fValue, wks.range('G4:G1300'))))
    suggestionNames = list(filter(fNonEmpty, map(fValue, wks.range('B4:B100'))))
    lastSuggestion = len(suggestionNames) + 4
    archiveLastPosition = len(archiveNames) + 4

    rolledCells = wks.range('B'+str(position)+':E'+ str(position))
    rolled = list(map(fValue, rolledCells))

    # move archive cells down 1 row
    archiveOldCells = wks.range('F4:L'+str(archiveLastPosition+1))
    lenArchiveOld = len(archiveOldCells)

    for i in reversed(range(len(archiveOldCells))):
        if lenArchiveOld > i + 7:
            archiveOldCells[i+7].value = archiveOldCells[i].value

    wks.update_cells(archiveOldCells)

    # move suggestion to F4
    archiveCells = wks.range('F4:L4')
    archiveCells[0].value = now.strftime("%d %b %y")
    archiveCells[1].value = rolled[0]
    archiveCells[2].value = rolled[1]
    archiveCells[3].value = rolled[2]
    archiveCells[4].value = rolled[3]
    archiveCells[5].value = ""
    archiveCells[6].value = ""

    wks.update_cells(archiveCells)

    # move suggestions up 1 row
    suggestionCells = wks.range('B'+str(position)+':E'+str(lastSuggestion))
    lenSuggestionCells = len(suggestionCells)
    for i in range(len(suggestionCells)):
        if lenSuggestionCells > i + 4:
            suggestionCells[i].value = suggestionCells[i+4].value

    wks.update_cells(suggestionCells)
    send(bot, "Suggestion moved to the archive.")
Esempio n. 2
0

############################################
#Main Block
############################################

infiles, outfilename = optParse(0)

#print infiles, outfilename;

print "# ======================================================================="
print "# Combining files:\t\t\t", infiles
print "# Writing output to:\t\t\t" + outfilename
print "# Writing only possible one-to-one orthologs to output file."
print "# -------------------------------------"
print "# " + core.getTime() + " Reading files..."

file_lines = {}
key_ids = {}

for each in infiles:
    infile = open(each, "r")
    inlines = infile.readlines()
    infile.close()

    file_lines[each] = []
    key_ids[each] = []

    i = 0
    for line in inlines:
        if i == 0:
Esempio n. 3
0
if script_outdir_initial != None:
    if not os.path.isdir(script_outdir_initial):
        core.errorOut(8, "-z must be a valid directory")
        optParse(1)

    script_outdir = os.path.join(
        script_outdir_initial,
        os.path.basename(os.path.normpath(script_outdir)))
if outdir_suffix != None:
    if script_outdir[-1] == "/":
        script_outdir = script_outdir[:len(script_outdir) -
                                      1] + "-" + outdir_suffix + "/"
    else:
        script_outdir = script_outdir + "-" + outdir_suffix + "/"

print core.getTime() + " | Creating main output directory:\t" + script_outdir
os.system("mkdir '" + script_outdir + "'")

logfilename = script_outdir + "supertreemaker.log"
logfile = open(logfilename, "w")
logfile.write("")
logfile.close()
##Pre-run prep: creating log files and output directories, etc...

core.logCheck(
    l, logfilename,
    "=======================================================================")
core.logCheck(l, logfilename,
              "\tSupertree making with SDM, R, newickutils, and r8s")
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime())
core.logCheck(l, logfilename,
Esempio n. 4
0
infilename, seqdir, speclist, remstart, outdir = optParse(0)

print "# ======================================================================="
print "# \t\t\tRetrieving sequences in FASTA format"
print "# \t\t\t" + core.getDateTime()
print "# Retrieving ortholog sequences from:\t", infilename
print "# Sequence directory\t\t\t" + seqdir
print "# Writing combined sequence files to:\t", outdir
if remstart == 1:
    print "# Removing start Methionines (-m 1)"
else:
    print "# NOT removing start Methionines (-m 0)"
print "# Reminder: Please ensure your species dictionary was entered correctly."
print "# Note: The script will skip any lines that do not have all species."
print "# -------------------------------------"
print "# " + core.getTime() + " Preparing species dictionary..."

specdict = {}
for each in speclist:
    current = each.split(":")
    specdict[current[0]] = current[1]

#print specdict;
print "# -------------------------------------"
print "# " + core.getTime(
) + " Reading peptide source files and extracting protein IDs..."
tmp_seq_dict = {}
for spec in specdict:
    tmp_seq_dict[spec] = core.fastaGetDict(os.path.join(
        seqdir, specdict[spec]))
Esempio n. 5
0
def convCheck(cur_c, c, ropt, targets, backgrounds, d, ins, outs):
    #	cur_c = 0;
    init_c = cur_c + 1
    while cur_c < c:
        #if c > 1:
        if ropt != 0:
            outfilename = outs + "_" + str(cur_c + 1) + ".txt"
        else:
            outfilename = outs + ".txt"

        if ropt != 0:
            #backgrounds = ["hg19", "rheMac3", "calJac3", "mm10", "rn5", "vicPac2", "bosTau7", "canFam3", "loxAfr3", "papHam1", "monDom5"];
            backgrounds = []
            cur_r = len(backgrounds)
            while cur_r < ropt:
                chosenspec = random.choice(all_specs.values())

                if chosenspec not in targets and chosenspec not in backgrounds:
                    backgrounds.append(chosenspec)
                    cur_r = cur_r + 1

        outfile = open(outfilename, "w")

        outfile.write(
            "# ==============================================================================================\n"
        )
        outfile.write("# \t\t\tConvergence testing\n")
        outfile.write("# \t\t\t" + core.getDateTime() + "\n")
        outfile.write("# Using alignments in:\t\t" + indir + "\n")
        outfile.write("# Target species:\t\t\t" + ", ".join(targets) + "\n")
        if ropt != 0:
            outfile.write("# Randomly choosing " + str(r) +
                          " background species and performing " + str(c) +
                          " replicate tests for convergence.\n")
            outfile.write("# This is replicate number " + str(cur_c + 1) +
                          "\n")
        outfile.write("# Background species:\t\t" + ", ".join(backgrounds) +
                      "\n")
        outfile.write("# Writing output to:\t\t\t" + outfilename + "\n")
        if d == 0:
            outfile.write("# Checking for convergent sites.\n")
        elif d == 1:
            outfile.write("# Checking for divergent sites.\n")
        outfile.write("# ---------------------------------------------\n")
        #sys.exit();
        #cur_c = cur_c + 1;
        #continue;
        aligns = os.listdir(ins)

        numbars = 0
        donepercent = []
        count = len(aligns)
        i = 0
        numsites = 0
        totgenes = 0
        outfile.write("# " + core.getTime() + " Starting Scan...\n")
        outfile.write(
            "# Site#\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n"
        )
        for align in aligns:
            #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars);
            i = i + 1

            if align.find(".fa") == -1:
                continue

            #if i > 25:
            #	break;

            infilename = ins + align
            #print align;
            gid = "_".join(align.split("_")[:2])
            chrome = align[align.find("chr"):align.find("chr") + 4]

            inseqs = core.fastaGetDict(infilename)

            num_targets_present = 0
            num_bg_present = 0
            for title in inseqs:
                if any(t in title for t in targets):
                    num_targets_present = num_targets_present + 1
                if any(b in title for b in backgrounds):
                    num_bg_present = num_bg_present + 1

            if num_targets_present == len(targets) and num_bg_present == len(
                    backgrounds):
                #print "The following gene has all target and background species and will be checked:\t\t" + gid;
                totgenes = totgenes + 1

                seqlen = len(inseqs[inseqs.keys()[0]])
                #print "Alignment length\t\t", seqlen;

                t_alleles = {}
                b_alleles = {}

                for x in xrange(len(inseqs[inseqs.keys()[0]])):
                    for title in inseqs:
                        for t in targets:
                            if t in title:
                                t_alleles[t] = inseqs[title][x]
                        for b in backgrounds:
                            if b in title:
                                b_alleles[b] = inseqs[title][x]

                    t_states = t_alleles.values()

                    t_gap = t_states.count("-")
                    t_missing = t_states.count("X")
                    t_stop = t_states.count("*")

                    b_states = b_alleles.values()

                    b_gap = b_states.count("-")
                    b_missing = b_states.count("X")
                    b_stop = b_states.count("*")

                    t_final = remGapMiss(t_states)
                    b_final = remGapMiss(b_states)

                    #print t_alleles;
                    #print t_states;
                    #print t_gap;
                    #print t_missing;
                    #print t_stop;
                    #print t_final;

                    #print b_alleles;
                    #print b_states;
                    #print b_gap;
                    #print b_missing;
                    #print b_stop;
                    #print b_final;

                    if t_final == [] or b_final == []:
                        continue

                    if d == 0:
                        if len(t_final) == len(targets) and len(
                                b_final) == len(backgrounds) and t_final.count(
                                    t_final[0]) == len(
                                        t_final) and t_final[0] not in b_final:
                            numsites = numsites + 1
                            #print core.getTime() + " Convergent site found!";
                            #print "Filename:\t\t" + align;
                            #print "Chromosome:\t\t" + chrome;
                            #print "Gene ID:\t\t" + gid;
                            #print "Alignment length\t", seqlen;
                            #print "Target alleles:\t\t" + "".join(t_final);
                            #print "Background alleles:\t" + "".join(b_final);
                            #print "---------------";
                            outline = str(
                                numsites
                            ) + "\t" + chrome + "\t" + gid + "\t" + str(
                                seqlen) + "\t" + str(x + 1) + "\t" + "".join(
                                    t_final) + "\t" + "".join(b_final) + "\n"
                            outfile.write(outline)

                            #sys.exit();

                    elif d == 1:
                        if len(t_final) == len(targets) and len(
                                b_final
                        ) == len(backgrounds) and t_final.count(
                                t_final[0]) != len(t_final) and b_final.count(
                                    b_final[0]) == len(b_final):
                            if not any(t in b_final for t in t_final):
                                numsites = numsites + 1
                                #print "\nDivergent site found!";
                                #print "Filename:\t\t" + align;
                                #print "Chromosome:\t\t" + chrome;
                                #print "Gene ID:\t\t" + gid;
                                #print "Alignment length\t", seqlen;
                                #print t_final;
                                #print b_final;
                                outline = str(
                                    numsites
                                ) + "\t" + chrome + "\t" + gid + "\t" + str(
                                    seqlen
                                ) + "\t" + str(x + 1) + "\t" + "".join(
                                    t_final) + "\t" + "".join(b_final) + "\n"
                                outfile.write(outline)

        #pstring = "100.0% complete.";
        #sys.stderr.write('\b' * len(pstring) + pstring);
        outfile.write("\n# " + core.getTime() + " Done!\n")
        outfile.write("# Total sites found: " + str(numsites) + "\n")
        outfile.write("# Total genes checked: " + str(totgenes) + "\n")
        outfile.write(
            "# =============================================================================================="
        )
        cur_c = cur_c + 1
    if ropt != 0:
        print core.getTime() + " Replicates", init_c, "to", c, "complete."
Esempio n. 6
0
		i = i + 1;

		if each.find(".fa") == -1:
			continue;

		specpos = 0;

		infilename = ins + each;

		inseqs = core.fastaGetDict(infilename);

		for seq in inseqs:
			tot_pos = tot_pos + len(inseqs[seq]);
			if disp_file == 1:
				specpos = specpos + len(inseqs[seq]);
				print seq + "\t" + str(len(inseqs[seq]));

		if disp_file == 1:
			print "Total\t" + str(specpos);

	if disp_file == 0:
		pstring = "100.0% complete.";
		sys.stderr.write('\b' * len(pstring) + pstring);
	elif disp_file == 1:
		print "----------";
	print "\n" + core.getTime() + " Done!";
	print "-----";
	print "Total residues:\t", tot_pos;
	print "=======================================================================";

Esempio n. 7
0
ins, gb_path, seqtype, m, v, l = optParse(0)

starttime = core.getLogTime()

if os.path.isfile(ins):
    fileflag = 1
    indir = os.path.dirname(os.path.realpath(ins)) + "/"
    indir, outdir = core.getOutdir(indir, "run_gblocks", starttime)
    filelist = [ins]
else:
    fileflag = 0
    indir, outdir = core.getOutdir(ins, "run_gblocks", starttime)
    filelist = os.listdir(indir)

print core.getTime() + " | Creating main output directory..."
os.system("mkdir " + outdir)

logfilename = outdir + "run_gblocks.log"
logfile = open(logfilename, "w")
logfile.write("")
logfile.close()

core.logCheck(
    l, logfilename,
    "=======================================================================")
core.logCheck(l, logfilename, "\t\t\tMasking alignments with GBlocks")
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime())
if fileflag == 1:
    core.logCheck(l, logfilename,
                  "INPUT    | Masking alignment from file: " + ins)
Esempio n. 8
0
File: fa_edit.py Progetto: gwct/core
			elif rep != "" and rep[1] == ":":
				print "INFO     | Replacing all " + rep[0] + " symbols in input sequences with a random AA symbol not present in that column.";
		suffix = suffix + ".r";
if os.path.isfile(ins):
	print "OUTPUT   | Writing output to the following file:\t\t" + outs;
else:
	if outs[len(outs)-1] != "/":
		outs = outs + "/";
	print "OUTPUT   | Writing output to the following directory:\t\t" + outs;
	if not os.path.exists(outs):
		print "+Creating output directory.";
		os.system("mkdir " + outs);
print "---------------------------------------------";
#sys.exit();

print core.getTime() + " Starting...";

numfiles = len(filelist);
numbars = 0;
donepercent = [];
i = 0;

for each in filelist:
	if each.find(".fa") == -1:
		continue;
	
	if os.path.isfile(ins):
		print ins;
		infilename = ins;
		outfilename = outs;
Esempio n. 9
0
indir, script_outdir = core.getOutdir(indir, "supertreemaker", starttime);
print script_outdir;
print os.path.basename(os.path.normpath(script_outdir));
if script_outdir_initial != None:
	if not os.path.isdir(script_outdir_initial):
		core.errorOut(8, "-z must be a valid directory");
		optParse(1);

	script_outdir = os.path.join(script_outdir_initial, os.path.basename(os.path.normpath(script_outdir)));
if outdir_suffix != None:
	if script_outdir[-1] == "/":
		script_outdir = script_outdir[:len(script_outdir)-1] + "-" + outdir_suffix + "/";
	else:
		script_outdir = script_outdir + "-" + outdir_suffix + "/";

print core.getTime() + " | Creating main output directory:\t" + script_outdir;
os.system("mkdir '" + script_outdir + "'");

logfilename = script_outdir + "supertreemaker.log";
logfile = open(logfilename, "w");
logfile.write("");
logfile.close();
##Pre-run prep: creating log files and output directories, etc...

core.logCheck(l, logfilename, "=======================================================================");
core.logCheck(l, logfilename, "\tSupertree making with SDM, R, newickutils, and r8s");
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime());
core.logCheck(l, logfilename, "INPUT    | Making tree from file:\t\t\t" + infilename);
core.logCheck(l, logfilename, "INPUT    | Input file located in:\t\t\t" + indir);
if njopt == 1:
	core.logCheck(l, logfilename, "INFO     | Using Average Consensus method in SDM to build distance matrix.");
Esempio n. 10
0
def convCheck(cur_c, c, ropt, targets, backgrounds, d, ins, outs):
#	cur_c = 0;
	init_c = cur_c+1;
	while cur_c < c:
		#if c > 1:
		if ropt != 0:
			outfilename = outs + "_" + str(cur_c+1) + ".txt";
		else:
			outfilename = outs + ".txt";

		if ropt != 0:
			#backgrounds = ["hg19", "rheMac3", "calJac3", "mm10", "rn5", "vicPac2", "bosTau7", "canFam3", "loxAfr3", "papHam1", "monDom5"];
			backgrounds = [];
			cur_r = len(backgrounds);
			while cur_r < ropt:
				chosenspec = random.choice(all_specs.values());

				if chosenspec not in targets and chosenspec not in backgrounds:
					backgrounds.append(chosenspec);
					cur_r = cur_r + 1;

		outfile = open(outfilename, "w");

		outfile.write("# ==============================================================================================\n");
		outfile.write("# \t\t\tConvergence testing\n");
		outfile.write("# \t\t\t" + core.getDateTime() + "\n");
		outfile.write("# Using alignments in:\t\t" + indir + "\n");
		outfile.write("# Target species:\t\t\t" + ", ".join(targets) + "\n");
		if ropt != 0:
			outfile.write("# Randomly choosing " + str(r) + " background species and performing " + str(c) + " replicate tests for convergence.\n");
			outfile.write("# This is replicate number " + str(cur_c+1) + "\n");
		outfile.write("# Background species:\t\t" + ", ".join(backgrounds) + "\n");
		outfile.write("# Writing output to:\t\t\t" + outfilename + "\n");
		if d == 0:
			outfile.write("# Checking for convergent sites.\n");
		elif d == 1:
			outfile.write("# Checking for divergent sites.\n");
		outfile.write("# ---------------------------------------------\n");
		#sys.exit();
		#cur_c = cur_c + 1;
		#continue;
		aligns = os.listdir(ins);

		numbars = 0;
		donepercent = [];
		count = len(aligns);
		i = 0;
		numsites = 0;
		totgenes = 0;
		outfile.write("# " + core.getTime() + " Starting Scan...\n");
		outfile.write("# Site#\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n");
		for align in aligns:
			#numbars, donepercent = core.loadingBar(i, count, donepercent, numbars);
			i = i + 1;

			if align.find(".fa") == -1:
				continue;

			#if i > 25:
			#	break;

			infilename = ins + align;
			#print align;
			gid = "_".join(align.split("_")[:2]);
			chrome = align[align.find("chr"):align.find("chr")+4]

			inseqs = core.fastaGetDict(infilename);

			num_targets_present = 0;
			num_bg_present = 0;
			for title in inseqs:
				if any(t in title for t in targets):
					num_targets_present = num_targets_present + 1;
				if any(b in title for b in backgrounds):
					num_bg_present = num_bg_present + 1;

			if num_targets_present == len(targets) and num_bg_present == len(backgrounds):
				#print "The following gene has all target and background species and will be checked:\t\t" + gid;
				totgenes = totgenes + 1;

				seqlen = len(inseqs[inseqs.keys()[0]]);
				#print "Alignment length\t\t", seqlen;

				t_alleles = {};
				b_alleles = {};

				for x in xrange(len(inseqs[inseqs.keys()[0]])):
					for title in inseqs:
						for t in targets:
							if t in title:
								t_alleles[t] = inseqs[title][x];
						for b in backgrounds:
							if b in title:
								b_alleles[b] = inseqs[title][x];

					t_states = t_alleles.values();

					t_gap = t_states.count("-");
					t_missing = t_states.count("X");
					t_stop = t_states.count("*");

					b_states = b_alleles.values();

					b_gap = b_states.count("-");
					b_missing = b_states.count("X");
					b_stop = b_states.count("*");

					t_final = remGapMiss(t_states);
					b_final = remGapMiss(b_states);

					#print t_alleles;
					#print t_states;
					#print t_gap;
					#print t_missing;
					#print t_stop;
					#print t_final;

					#print b_alleles;
					#print b_states;
					#print b_gap;
					#print b_missing;
					#print b_stop;
					#print b_final;

					if t_final == [] or b_final == []:
						continue;

					if d == 0:
						if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) == len(t_final) and t_final[0] not in b_final:
							numsites = numsites + 1;
							#print core.getTime() + " Convergent site found!";
							#print "Filename:\t\t" + align;
							#print "Chromosome:\t\t" + chrome;
							#print "Gene ID:\t\t" + gid;
							#print "Alignment length\t", seqlen;
							#print "Target alleles:\t\t" + "".join(t_final);
							#print "Background alleles:\t" + "".join(b_final);
							#print "---------------";
							outline = str(numsites) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n";
							outfile.write(outline);

							#sys.exit();

					elif d == 1:
						if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) != len(t_final) and b_final.count(b_final[0]) == len(b_final):
							if not any(t in b_final for t in t_final):
								numsites = numsites + 1;
								#print "\nDivergent site found!";
								#print "Filename:\t\t" + align;
								#print "Chromosome:\t\t" + chrome;
								#print "Gene ID:\t\t" + gid;
								#print "Alignment length\t", seqlen;
								#print t_final;
								#print b_final;
								outline = str(numsites) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n";
								outfile.write(outline);

		#pstring = "100.0% complete.";
		#sys.stderr.write('\b' * len(pstring) + pstring);
		outfile.write("\n# " + core.getTime() + " Done!\n");
		outfile.write("# Total sites found: " + str(numsites) + "\n");
		outfile.write("# Total genes checked: " + str(totgenes) + "\n");
		outfile.write("# ==============================================================================================");
		cur_c = cur_c + 1;
	if ropt != 0:
		print core.getTime() + " Replicates", init_c, "to", c, "complete.";
Esempio n. 11
0
                print "INFO     | Replacing all " + rep[
                    0] + " symbols in input sequences with a random AA symbol not present in that column."
        suffix = suffix + ".r"
if os.path.isfile(ins):
    print "OUTPUT   | Writing output to the following file:\t\t" + outs
else:
    if outs[len(outs) - 1] != "/":
        outs = outs + "/"
    print "OUTPUT   | Writing output to the following directory:\t\t" + outs
    if not os.path.exists(outs):
        print "+Creating output directory."
        os.system("mkdir " + outs)
print "---------------------------------------------"
#sys.exit();

print core.getTime() + " Starting..."

numfiles = len(filelist)
numbars = 0
donepercent = []
i = 0

for each in filelist:
    if each.find(".fa") == -1:
        continue

    if os.path.isfile(ins):
        print ins
        infilename = ins
        outfilename = outs
Esempio n. 12
0
            script_outdir = script_outdir[:script_outdir.index("-") +
                                          1] + str(counter)
        counter += 1
    if os.path.isfile(ins):
        fileflag = 1
        filelist = [os.path.abspath(ins)]
    else:
        fileflag = 0
        filelist = os.listdir(ins)
ins = os.path.abspath(ins)

script_outdir = os.path.abspath(script_outdir)
bestdir = os.path.join(script_outdir, "raxml-best")
outdir = os.path.join(script_outdir, "raxml-out")

print core.getTime() + " | Creating main output directory:\t" + script_outdir
os.system("mkdir '" + script_outdir + "'")

logfilename = os.path.join(script_outdir, "run_raxml.log")
core.filePrep(logfilename)

core.logCheck(
    l, logfilename,
    "=======================================================================")
core.logCheck(l, logfilename, "\t\t\tBuilding trees with RAxML")
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime())
if fileflag == 1:
    core.logCheck(l, logfilename,
                  "INPUT    | Making tree from file:\t\t" + ins)
else:
    core.logCheck(l, logfilename,
Esempio n. 13
0
def convCheck(cur_c, c, number_specs, d, ins, outs):
    #	cur_c = 0;
    init_c = cur_c + 1
    while cur_c < c:
        #if c > 1:

        spec_list = all_specs.values()
        rep_specs = []
        while len(rep_specs) < number_specs:
            r = random.choice(spec_list)
            rep_specs.append(r)
            spec_list.remove(r)

        outfilename = outs + "_" + str(cur_c + 1) + ".txt"
        outfile = open(outfilename, "w")
        outfile.write(
            "# ==============================================================================================\n"
        )
        outfile.write("# \t\t\tConvergence testing\n")
        outfile.write("# \t\t\t" + core.getDateTime() + "\n")
        outfile.write("# Using alignments in:\t\t" + indir + "\n")
        outfile.write("# Randomly choosing " + str(number_specs) +
                      " species and performing " + str(c) +
                      " replicate tests for convergence.\n")
        outfile.write("# This is replicate number " + str(cur_c + 1) + "\n")
        outfile.write("# Writing output to:\t\t\t" + outfilename + "\n")
        if d == 0:
            outfile.write("# Checking for convergent sites.\n")
        elif d == 1:
            outfile.write("# Checking for divergent sites.\n")
        outfile.write("# Using species:\t" + ",".join(rep_specs))
        outfile.write("# ---------------------------------------------\n")

        #sys.exit();
        #cur_c = cur_c + 1;
        #continue;
        aligns = os.listdir(ins)
        numbars = 0
        donepercent = []
        count = len(aligns)
        i = 0
        numsites = 0
        totgenes = 0
        outfile.write("# " + core.getTime() + " Starting Scan...\n")
        outfile.write(
            "# Site#\tTargetSpecs\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n"
        )
        for align in aligns:
            #numbars, donepercent = core.loadingBar(i, count, donepercent, numbars);
            i = i + 1
            if align.find(".fa") == -1:
                continue

            infilename = os.path.join(ins, align)
            gid = "_".join(align.split("_")[:2])
            chrome = align[align.find("chr"):align.find("chr") + 4]

            inseqs = core.fastaGetDict(infilename)

            for t1 in rep_specs:
                for t2 in rep_specs:
                    if t1 == t2:
                        continue

                    targets = [t1, t2]
                    backgrounds = [
                        spec for spec in rep_specs if spec not in targets
                    ]

                    num_targets_present = 0
                    num_bg_present = 0
                    for title in inseqs:
                        if any(t in title for t in targets):
                            num_targets_present = num_targets_present + 1
                        if any(b in title for b in backgrounds):
                            num_bg_present = num_bg_present + 1

                    if num_targets_present == len(
                            targets) and num_bg_present == len(backgrounds):
                        # print "The following gene has all target and background species and will be checked:\t\t" + gid;
                        totgenes = totgenes + 1

                        seqlen = len(inseqs[inseqs.keys()[0]])
                        # print "Alignment length\t\t", seqlen;

                        t_alleles = {}
                        b_alleles = {}

                        for x in xrange(len(inseqs[inseqs.keys()[0]])):
                            for title in inseqs:
                                cur_spec = title[1:].replace("\n", "")
                                if cur_spec in targets:
                                    t_alleles[cur_spec] = inseqs[title][x]
                                if cur_spec in backgrounds:
                                    b_alleles[cur_spec] = inseqs[title][x]

                            t_states = t_alleles.values()
                            #t_gap = t_states.count("-");
                            #t_missing = t_states.count("X");
                            #t_stop = t_states.count("*");

                            b_states = b_alleles.values()
                            #b_gap = b_states.count("-");
                            #b_missing = b_states.count("X");
                            #b_stop = b_states.count("*");

                            t_final = remGapMiss(t_states)
                            b_final = remGapMiss(b_states)

                            if t_final == [] or b_final == []:
                                continue

                            if d == 0:
                                if len(t_final) == len(targets) and len(
                                        b_final
                                ) == len(backgrounds) and t_final.count(
                                        t_final[0]) == len(
                                            t_final
                                        ) and t_final[0] not in b_final:
                                    numsites = numsites + 1
                                    print core.getTime(
                                    ) + " Convergent site found!"
                                    print "Filename:\t\t" + align
                                    print "Chromosome:\t\t" + chrome
                                    print "Gene ID:\t\t" + gid
                                    print "Alignment length\t", seqlen
                                    print "Target alleles:\t\t" + "".join(
                                        t_final)
                                    print "Background alleles:\t" + "".join(
                                        b_final)
                                    print "---------------"
                                    outline = str(numsites) + "\t" + ",".join(
                                        targets
                                    ) + "\t" + chrome + "\t" + gid + "\t" + str(
                                        seqlen) + "\t" + str(
                                            x + 1) + "\t" + "".join(
                                                t_final) + "\t" + "".join(
                                                    b_final) + "\n"
                                    outfile.write(outline)

                            elif d == 1:
                                if len(t_final) == len(targets) and len(
                                        b_final
                                ) == len(backgrounds) and t_final.count(
                                        t_final[0]) != len(
                                            t_final) and b_final.count(
                                                b_final[0]) == len(b_final):
                                    if not any(t in b_final for t in t_final):
                                        numsites = numsites + 1
                                        # print "\nDivergent site found!";
                                        # print "Filename:\t\t" + align;
                                        # print "Chromosome:\t\t" + chrome;
                                        # print "Gene ID:\t\t" + gid;
                                        # print "Alignment length\t", seqlen;
                                        # print t_final;
                                        # print b_final;
                                        outline = str(
                                            numsites
                                        ) + "\t" + ",".join(
                                            targets
                                        ) + "\t" + chrome + "\t" + gid + "\t" + str(
                                            seqlen) + "\t" + str(
                                                x + 1) + "\t" + "".join(
                                                    t_final) + "\t" + "".join(
                                                        b_final) + "\n"
                                        outfile.write(outline)

        #pstring = "100.0% complete.";
        #sys.stderr.write('\b' * len(pstring) + pstring);
        outfile.write("\n# " + core.getTime() + " Done!\n")
        outfile.write("# Total sites found: " + str(numsites) + "\n")
        outfile.write("# Total genes checked: " + str(totgenes) + "\n")
        outfile.write(
            "# =============================================================================================="
        )
        cur_c = cur_c + 1
    if ropt != 0:
        print core.getTime() + " Replicates", init_c, "to", c, "complete."
Esempio n. 14
0
    reps) + " replicate tests."
print "The task will be split into " + str(
    num_t
) + " processes and species will be chosen as the processes are split."
if cd == 0:
    print "Checking for convergent sites."
elif cd == 1:
    print "Checking for divergent sites."
print "---------------------------------------------"

if num_t > 1:
    bgs = []
    processes = []
    output = mp.Queue()
    reps_per_t = reps / num_t
    print core.getTime() + " Generating function calls..."
    print "Function calls and arguments are as follows:"
    print "convCheck([start replicate], [stop replicate], [# random species], [target species], [background species], [convergence/divergence option], [input directory], [output prefix])"
    print "----------"
    x = 0
    while x < num_t:
        start_rep = reps_per_t * x
        stop_rep = reps_per_t * (x + 1)
        print core.getTime() + " Call " + str(x + 1) + ":  convCheck(" + str(
            start_rep) + " , " + str(stop_rep) + " , " + str(
                specs) + " , " + str(cd) + ", " + indir + " , " + outfix + ")"
        processes.append(
            mp.Process(target=convCheck,
                       args=(start_rep, stop_rep, specs, cd, indir, outfix)))
        x = x + 1
    print "----------"
Esempio n. 15
0
	filelist = [ins];

else:
	fileflag = 0;
	indir, script_outdir = core.getOutdir(ins, "run_codeml", starttime);
	if outdir_suffix != None:
		if script_outdir[-1] == "/":
			script_outdir = script_outdir[:len(script_outdir)-1] + "-" + outdir_suffix + "/";
		else:
			script_outdir = script_outdir + "-" + outdir_suffix + "/";
	outdir = script_outdir + "codeml_out/";
	filelist = os.listdir(indir);
	if aopt == 1:
		ancdir = script_outdir + "anc_seqs_fa/";

print core.getTime() + " | Creating main output directory:\t" + script_outdir;
os.system("mkdir " + script_outdir);

logfilename = script_outdir + "run_codeml.log";
logfile = open(logfilename, "w");
logfile.write("");
logfile.close();

core.logCheck(l, logfilename, "=======================================================================");
core.logCheck(l, logfilename, "\t\t\tRunning codeml");
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime());
if fileflag == 1:
	core.logCheck(l, logfilename, "INPUT    | Making tree from file:\t\t" + ins);
else:
	core.logCheck(l, logfilename, "INPUT    | Running codeml on all files in:\t" + indir);
core.logCheck(l, logfilename, "INFO     | PAML path set to:\t\t\t" + ppath);
Esempio n. 16
0
    indir, script_outdir = core.getOutdir(indir, "run_codeml", starttime)
    outdir = os.path.join(script_outdir, "codeml_out")
    if aopt == 1:
        ancdir = os.path.join(script_outdir, "anc_seqs_fa")
    filelist = [ins]

else:
    fileflag = 0
    indir, script_outdir = core.getOutdir(ins, "run_codeml", outdir_suffix,
                                          starttime)
    outdir = os.path.join(script_outdir, "codeml_out")
    filelist = os.listdir(indir)
    if aopt == 1:
        ancdir = os.path.join(script_outdir, "anc_seqs_fa")

print core.getTime() + " | Creating main output directory:\t" + script_outdir
mk_cmd = "mkdir " + script_outdir
os.system(mk_cmd)

logfilename = os.path.join(script_outdir, "run_codeml.log")
# logfile = open(logfilename, "w");
# logfile.write("");
# logfile.close();

core.logCheck(
    l, logfilename,
    "=======================================================================")
core.logCheck(l, logfilename, "\t\t\tRunning codeml")
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime())
if fileflag == 1:
    core.logCheck(l, logfilename,
Esempio n. 17
0
print "\t\t\tSequence format conversion";
print "\t\t\t" + core.getDateTime();
if fileflag == 1:
	print "INPUT    | Converting file: " + ins;
else:
	print "INPUT    | Converting all files from directory: " + ins;
print "INFO     | Input format:  " + fr;
print "INFO     | Output format: " + to;
if fileflag == 1:
	print "OUTPUT   | Writing output to file: " + outs;
else:
	print "OUTPUT   | Writing output files to directory:   " + outs;
print "-------------------------------------";

if fileflag == 0:
	print core.getTime() + " | Creating output directory...";
	if not os.path.exists(outs):
		os.system("mkdir " + outs);

	numfiles = len(filelist);
	numbars = 0;
	donepercent = [];
	i = 0;

if fr == "f":
	init = ".fa";
elif fr == "p":
	init = ".ph";
elif fr == "n":
	init = ".nex";
Esempio n. 18
0
print "\t\t\t" + core.getDateTime();
print "Using alignments in:\t\t" + indir
print "Choosing " + str(specs) + " species randomly and performing " + str(reps) + " replicate tests.";
print "The task will be split into " + str(num_t) + " processes and species will be chosen as the processes are split.";
if cd == 0:
	print "Checking for convergent sites.";
elif cd == 1:
	print "Checking for divergent sites.";
print "---------------------------------------------";

if num_t > 1:
	bgs = [];
	processes = []
	output = mp.Queue()
	reps_per_t = reps / num_t;
	print core.getTime() + " Generating function calls...";
	print "Function calls and arguments are as follows:";
	print "convCheck([start replicate], [stop replicate], [# random species], [target species], [background species], [convergence/divergence option], [input directory], [output prefix])";
	print "----------";
	x = 0;
	while x < num_t:
		start_rep = reps_per_t * x;
		stop_rep = reps_per_t * (x+1);
		print core.getTime() + " Call " + str(x+1) + ":  convCheck(" + str(start_rep) + " , " + str(stop_rep) + " , " + str(specs) + " , " + str(cd) + ", " + indir + " , " + outfix + ")";
		processes.append(mp.Process(target=convCheck,args=(start_rep,stop_rep,specs,cd,indir,outfix)));
		x = x + 1;
	print "----------";
	##print processes;
	##convCheck(
	for p in processes:
		print core.getTime() + " start", p;
Esempio n. 19
0
ins, gb_path, seqtype, m, v, l = optParse(0);

starttime = core.getLogTime();

if os.path.isfile(ins):
	fileflag = 1;
	indir = os.path.dirname(os.path.realpath(ins)) + "/";
	indir, outdir = core.getOutdir(indir, "run_gblocks", starttime);
	filelist = [ins];
else:
	fileflag = 0;
	indir, outdir = core.getOutdir(ins, "run_gblocks", starttime);
	filelist = os.listdir(indir);

print core.getTime() + " | Creating main output directory...";
os.system("mkdir " + outdir);

logfilename = outdir + "run_gblocks.log";
logfile = open(logfilename, "w");
logfile.write("");
logfile.close();

core.logCheck(l, logfilename, "=======================================================================");
core.logCheck(l, logfilename, "\t\t\tMasking alignments with GBlocks");
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime());
if fileflag == 1:
	core.logCheck(l, logfilename, "INPUT    | Masking alignment from file: " + ins);
else:
	core.logCheck(l, logfilename, "INPUT    | Masking alignments from all files in: " + indir);
core.logCheck(l, logfilename, "INFO     | GBlocks path set to: " + gb_path);
Esempio n. 20
0
def convCheck(cur_c, c, number_specs, d, ins, outs):
#	cur_c = 0;
	init_c = cur_c+1;
	while cur_c < c:
		#if c > 1:

		spec_list = all_specs.values();
		rep_specs = [];
		while len(rep_specs) < number_specs:
			r = random.choice(spec_list);
			rep_specs.append(r);
			spec_list.remove(r);

		outfilename = outs + "_" + str(cur_c+1) + ".txt";
		outfile = open(outfilename, "w");
		outfile.write("# ==============================================================================================\n");
		outfile.write("# \t\t\tConvergence testing\n");
		outfile.write("# \t\t\t" + core.getDateTime() + "\n");
		outfile.write("# Using alignments in:\t\t" + indir + "\n");
		outfile.write("# Randomly choosing " + str(number_specs) + " species and performing " + str(c) + " replicate tests for convergence.\n");
		outfile.write("# This is replicate number " + str(cur_c+1) + "\n");
		outfile.write("# Writing output to:\t\t\t" + outfilename + "\n");
		if d == 0:
			outfile.write("# Checking for convergent sites.\n");
		elif d == 1:
			outfile.write("# Checking for divergent sites.\n");
		outfile.write("# Using species:\t" + ",".join(rep_specs));
		outfile.write("# ---------------------------------------------\n");

		#sys.exit();
		#cur_c = cur_c + 1;
		#continue;
		aligns = os.listdir(ins);
		numbars = 0;
		donepercent = [];
		count = len(aligns);
		i = 0;
		numsites = 0;
		totgenes = 0;
		outfile.write("# " + core.getTime() + " Starting Scan...\n");
		outfile.write("# Site#\tTargetSpecs\tChromosome\tGeneID\tAlignLen\tPosition\tTargetAlleles\tBackgroundAlleles\n");
		for align in aligns:
			#numbars, donepercent = core.loadingBar(i, count, donepercent, numbars);
			i = i + 1;
			if align.find(".fa") == -1:
				continue;

			infilename = os.path.join(ins, align);
			gid = "_".join(align.split("_")[:2]);
			chrome = align[align.find("chr"):align.find("chr")+4]

			inseqs = core.fastaGetDict(infilename);

			for t1 in rep_specs:
				for t2 in rep_specs:
					if t1 == t2:
						continue;

					targets = [t1, t2];
					backgrounds = [spec for spec in rep_specs if spec not in targets];

					num_targets_present = 0;
					num_bg_present = 0;
					for title in inseqs:
						if any(t in title for t in targets):
							num_targets_present = num_targets_present + 1;
						if any(b in title for b in backgrounds):
							num_bg_present = num_bg_present + 1;

					if num_targets_present == len(targets) and num_bg_present == len(backgrounds):
						# print "The following gene has all target and background species and will be checked:\t\t" + gid;
						totgenes = totgenes + 1;

						seqlen = len(inseqs[inseqs.keys()[0]]);
						# print "Alignment length\t\t", seqlen;

						t_alleles = {};
						b_alleles = {};

						for x in xrange(len(inseqs[inseqs.keys()[0]])):
							for title in inseqs:
								cur_spec = title[1:].replace("\n","");
								if cur_spec in targets:
									t_alleles[cur_spec] = inseqs[title][x];
								if cur_spec in backgrounds:
									b_alleles[cur_spec] = inseqs[title][x];

							t_states = t_alleles.values();
							#t_gap = t_states.count("-");
							#t_missing = t_states.count("X");
							#t_stop = t_states.count("*");

							b_states = b_alleles.values();
							#b_gap = b_states.count("-");
							#b_missing = b_states.count("X");
							#b_stop = b_states.count("*");

							t_final = remGapMiss(t_states);
							b_final = remGapMiss(b_states);

							if t_final == [] or b_final == []:
								continue;

							if d == 0:
								if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) == len(t_final) and t_final[0] not in b_final:
									numsites = numsites + 1;
									print core.getTime() + " Convergent site found!";
									print "Filename:\t\t" + align;
									print "Chromosome:\t\t" + chrome;
									print "Gene ID:\t\t" + gid;
									print "Alignment length\t", seqlen;
									print "Target alleles:\t\t" + "".join(t_final);
									print "Background alleles:\t" + "".join(b_final);
									print "---------------";
									outline = str(numsites) + "\t" + ",".join(targets) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n";
									outfile.write(outline);

							elif d == 1:
								if len(t_final) == len(targets) and len(b_final) == len(backgrounds) and t_final.count(t_final[0]) != len(t_final) and b_final.count(b_final[0]) == len(b_final):
									if not any(t in b_final for t in t_final):
										numsites = numsites + 1;
										# print "\nDivergent site found!";
										# print "Filename:\t\t" + align;
										# print "Chromosome:\t\t" + chrome;
										# print "Gene ID:\t\t" + gid;
										# print "Alignment length\t", seqlen;
										# print t_final;
										# print b_final;
										outline = str(numsites) + "\t" + ",".join(targets) + "\t" + chrome + "\t" + gid + "\t" + str(seqlen) + "\t" + str(x+1) + "\t" + "".join(t_final) + "\t" + "".join(b_final) + "\n";
										outfile.write(outline);

		#pstring = "100.0% complete.";
		#sys.stderr.write('\b' * len(pstring) + pstring);
		outfile.write("\n# " + core.getTime() + " Done!\n");
		outfile.write("# Total sites found: " + str(numsites) + "\n");
		outfile.write("# Total genes checked: " + str(totgenes) + "\n");
		outfile.write("# ==============================================================================================");
		cur_c = cur_c + 1;
	if ropt != 0:
		print core.getTime() + " Replicates", init_c, "to", c, "complete.";
Esempio n. 21
0
            print "Sequence\tLength"
        i = i + 1

        if each.find(".fa") == -1:
            continue

        specpos = 0

        infilename = ins + each

        inseqs = core.fastaGetDict(infilename)

        for seq in inseqs:
            tot_pos = tot_pos + len(inseqs[seq])
            if disp_file == 1:
                specpos = specpos + len(inseqs[seq])
                print seq + "\t" + str(len(inseqs[seq]))

        if disp_file == 1:
            print "Total\t" + str(specpos)

    if disp_file == 0:
        pstring = "100.0% complete."
        sys.stderr.write('\b' * len(pstring) + pstring)
    elif disp_file == 1:
        print "----------"
    print "\n" + core.getTime() + " Done!"
    print "-----"
    print "Total residues:\t", tot_pos
    print "======================================================================="
Esempio n. 22
0
if os.path.isfile(ins):
    fileflag = 1
    indir = os.path.dirname(os.path.realpath(ins)) + "/"
    indir, script_outdir = core.getOutdir(indir, "run_raxml", starttime)
    bestdir = script_outdir + "raxml_best/"
    outdir = script_outdir + "raxml_out/"
    filelist = [ins]
else:
    fileflag = 0
    indir, script_outdir = core.getOutdir(ins, "run_raxml", starttime)
    bestdir = script_outdir + "raxml_best/"
    outdir = script_outdir + "raxml_out/"
    filelist = os.listdir(indir)

print core.getTime() + " | Creating main output directory:\t" + script_outdir
os.system("mkdir '" + script_outdir + "'")

logfilename = script_outdir + "run_raxml.log"
logfile = open(logfilename, "w")
logfile.write("")
logfile.close()

core.logCheck(l, logfilename, "=======================================================================")
core.logCheck(l, logfilename, "\t\t\tBuilding trees with RAxML")
core.logCheck(l, logfilename, "\t\t\t" + core.getDateTime())
if fileflag == 1:
    core.logCheck(l, logfilename, "INPUT    | Making tree from file:\t\t" + indir)
else:
    core.logCheck(l, logfilename, "INPUT    | Making trees from all files in:\t" + indir)
core.logCheck(l, logfilename, "INPUT    | RAxML path set to:\t\t\t" + rax_path)