def select_by(population, infile, outfile, nval, val): print(infile) rows = qlibs.get_rows(infile) header = 1000 text = "" posgs = -1 for i in range(len(rows)): cols = rows[i].split(";") if len(cols) > 1: if header == 1000: text = rows[i] + ("\n") header = i hcols = rows[i].split(";") posgs = qlibs.find(nval, hcols) if posgs == -1: posgs = qlibs.find(nval.replace('"', ''), hcols) if posgs == -1: qlibs.trace("selectBy", infile, population) return if i > header: if cols[posgs] == val: text = text + rows[i] + "\n" fout = open(outfile, "w") fout.write(text) fout.close() return
def get_genes(population, datafile, xname, yname): rows = qlibs.get_rows(datafile) header = False x = [] y = [] xcol = -1 ycol = -1 prev = "" id = 0 for row in rows: cols = row.split(";") if len(cols) > 1: if header == False: xcol = qlibs.find(xname, cols) if xcol >= 0: header = True ycol = qlibs.find(yname, cols) if ycol >= 0: header = True else: if cols[xcol] != prev: id = id + 1 prev = cols[xcol] x.append(id) if cols[ycol].replace(",","").replace("-","").isdecimal() == True: y.append(float(cols[ycol].replace(",","."))) else: y.append(0) qlibs.trace("graph_fields" + yname, datafile + "\n" + str(cols), population) return [x, y]
def select_score(population, infile, outfile, nval, rel, threshold): print(infile) rows = qlibs.get_rows(infile) header = 1000 text = "" posgs = -1 for i in range(len(rows)): cols = rows[i].split(";") if len(cols) > 1: if header == 1000: text = rows[i] + ("\n") header = i hcols = rows[i].split(";") posgs = qlibs.find(nval, hcols) if posgs == -1: posgs = qlibs.find(nval.replace('"', ''), hcols) if posgs == -1: qlibs.trace("selectS", infile, population) return if i > header: if rel == "greater_than": if int(cols[posgs]) >= threshold: text = text + rows[i] + "\n" if rel == "less_than": if int(cols[posgs]) <= threshold: text = text + rows[i] + "\n" if rel == "equal_to": if int(cols[posgs]) == threshold: text = text + rows[i] + "\n" fout = open(outfile, "w") fout.write(text) fout.close() return
def start_file(population, algo): logdir = qlibs.get_logdir() + population + "/" startf = get_filename(population, algo, 0) files = [] for name in os.listdir(logdir): if os.path.isfile(os.path.join(logdir, name)): if name.find(algo.lower() + "_error") >= 0: if name.find(".log") >= 0: files.append(name) list = [] for file in files: infile = logdir + file f = open(infile, "r") buf = f.read() f.close() lines = buf.split("\n") for line in lines: if line.find("C:/") >= 0 or line.find("./") >= 0: if line.find(".txt") >= 0: if qlibs.find(line, list) < 0: list.append(line) f = open(startf, "w") for line in list: f.write(line + "\n") f.close() return
def txt_equals_csv(population, algo, reverse, count): splitted = "" for i in range(0, count): splitted = splitted + "Splitted" indir = qlibs.get_datadir() + population sets = os.listdir(indir) text = "" for set in sets: csvfiles = [] txtfiles = [] if os.path.isdir(indir + "/" + set): for dir in os.listdir(indir + "/" + set): if dir.find(algo + reverse + splitted) >= 0: if os.path.isdir(indir + "/" + set + "/" + dir + "/"): for file in os.listdir(indir + "/" + set + "/" + dir + "/"): if file.find(".csv") > 0: csvfiles.append(file) if dir.find("Text" + reverse + algo + splitted) >= 0: if os.path.isdir(indir + "/" + set + "/" + dir + "/"): for file in os.listdir(indir + "/" + set + "/" + dir + "/"): if file.find(".txt") > 0: txtfiles.append(file) for file in txtfiles: if qlibs.find(file.replace(".txt", ".csv"), csvfiles) < 0: text = text + qlibs.get_datadir( ) + population + "/" + set + "/Text" + reverse + algo + splitted + "/" + file + "\n" if text == "": result = True else: outfile = get_filename(population, algo, count) fout = open(outfile, "w") fout.write(text) fout.close() result = False return result
def get_scores(population, datafile, nscore): rows = qlibs.get_rows(datafile) header = False result = [] ncol = -1 for row in rows: cols = row.split(";") if len(cols) > 1: if header == False: ncol = qlibs.find(nscore, cols) if ncol >= 0: header = True else: ncol = qlibs.find(nscore.replace('"',''), cols) if ncol >= 0: header = True else: if cols[ncol].isnumeric() == True: result.append(int(cols[ncol])) else: qlibs.trace("graph_freq", datafile + "\n" + str(cols), population) return result
def get_seq(population, argv): result = [] basedir = qlibs.get_datadir() + population + "/" countfile = qlibs.get_datadir() + "Countfile.log" indata = params(argv) finput = indata[0] search = indata[1] col3rd = indata[3] if col3rd == False: basename = os.path.basename(indata[0]) result.append(basename[:len(basename) - 4]) datadir = basedir + basename[:len(basename) - 4] datadir = create_dir(datadir, indata) fin = open(finput, "r") buf = fin.read() fin.close() rows = buf.split("\n") try: fc = open(countfile, "r") buf = fc.read() fc.close() cols = buf.split("\t") count = int(cols[0]) day = time.strptime(cols[3], "%Y-%m-%d") today = time.strptime(str(datetime.date.today()), "%Y-%m-%d") print(day, today) if today > day: count = 0 elif count >= 5000: tk.messagebox.showinfo("Quad", "Daily hits limit reached!") return result if cols[2] == finput: i = int(cols[1]) if i >= len(rows): res = tk.messagebox.askyesno( "Quad", "WARNING:\n\n File already processed \n\n Do you want to force Execution?" ) if res == True: i = 0 else: i = 0 except: count = 0 i = 0 while i < len(rows): cols = rows[i].split(";") if len(cols) > 1: code = cols[0] gene = cols[1] if col3rd == True: if qlibs.find(cols[2], result) < 0: result.append(cols[2]) datadir = basedir + cols[2] datadir = create_dir(datadir, indata) count = count + 1 print(count, gene, code) text = get_fasta(gene, code, search) if text == "": text = get_fasta_gene(gene, code, search) if text == "": get_fasta_alt(gene, code, search) if text == "": text = get_fasta_gene_alt(gene, code, search) if text == "": qlibs.trace("fasta", gene + "\t" + code, population) else: fout = open( datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() else: fout = open(datadir + gene + "__" + code + ".fasta", "w") fout.write(text) fout.close() if count % 5000 == 0: tk.messagebox.showinfo("Quad", "Daily hits limit reached!") break time.sleep(5) i = i + 1 fc = open(countfile, "w") fc.write( str(count) + "\t" + str(i) + "\t" + finput + "\t" + str(datetime.date.today())) fc.close() return result
def intersect(population, algo): lists = [] sets = [] dists = [] indir = qlibs.get_datadir() + population + "/" outfile = indir + "intersect_genes_" + population + str( datetime.date.today()).replace("-", "") + ".csv" for set in os.listdir(indir): if os.path.isdir(indir + set): files = [ f for f in os.listdir(indir + set) if os.path.isfile(indir + set + "/" + f) ] lastfile = "" for file in files: try: if file.index("dist_" + algo) == 0 and file.index( ".csv") == len(file) - 4: if file > lastfile: lastfile = file except: pass if lastfile == "": qlibs.trace("intersect", set, pop=population) else: lists.append(qlibs.get_uniques(indir + set + "/" + lastfile)) sets.append(set) f = open(indir + set + "/" + lastfile, "r") buf = f.read() f.close() dists.append(buf) elems = [] setreps = [] setidxs = [] for i in range(len(sets) - 1): j = i + 1 while j < len(lists): for elem in lists[i]: if qlibs.find(elem, lists[j]) >= 0: k = qlibs.find(elem, elems) if k >= 0: if qlibs.find(sets[j], setreps[k]) < 0: setreps[k].append(sets[j]) setidxs[k].append(j) else: elems.append(elem) list = [sets[i], sets[j]] setreps.append(list) list = [i, j] setidxs.append(list) j = j + 1 text = "" print(sets) print(setreps) print(setidxs) for i in range(len(elems)): text = text + elems[i] + ";" for set in setreps[i]: text = text + set + ";" text = text + "\n" if text == "": text = "No intersection found" fout = open(outfile, "w") fout.write(text) fout.close() text = "" for i in range(len(elems)): j = setidxs[i][0] buf = dists[j] print(j) print(elems[i]) rows = buf.split("\n") for row in rows: if row.find(elems[i]) >= 0: text = text + row for set in setreps[i]: text = text + set + ";" text = text + "\n" outfile = indir + "intersect_dists_" + algo + "_" + population + str( datetime.date.today()).replace("-", "") + ".csv" fout = open(outfile, "w") fout.write(text) fout.close() return
def union(indir, set, dir, infile, population, algo, reverse): fgr = split_dir(infile) name = fgr[0] csvudir = indir + set + "/" if not os.path.exists(csvudir): os.mkdir(csvudir) csvudir = indir + set + "/" + algo + reverse + "/" if not os.path.exists(csvudir): os.mkdir(csvudir) csvudir = indir + set + "/" + algo + reverse + "/" + fgr[2] + "/" if not os.path.exists(csvudir): os.mkdir(csvudir) csvudir = indir + set + "/" + algo + reverse + "/" + fgr[2] + "/" + fgr[ 1] + "/" if not os.path.exists(csvudir): os.mkdir(csvudir) if name.find(".csv") < 0: csvufile = csvudir + name + ".csv" ucsvdir = indir + "/" + set + "/" if not os.path.exists(ucsvdir): os.mkdir(ucsvdir) ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/" if not os.path.exists(ucsvdir): os.mkdir(ucsvdir) ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/" + fgr[2] + "/" if not os.path.exists(ucsvdir): os.mkdir(ucsvdir) ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/" + fgr[ 2] + "/" + fgr[1] + "/" if not os.path.exists(ucsvdir): os.mkdir(ucsvdir) if name.find(".csv") < 0: ucsvfile = ucsvdir + name + ".csv" split_files = [] split_texts = [] split_dirs = [] if not os.path.exists(csvufile): if os.path.sdir(indir + "/" + set + "/"): for bufdir in os.listdir(indir + "/" + set + "/"): if bufdir.find(algo + reverse + "Splitted") >= 0: if os.path.sdir(indir + "/" + set + "/" + bufdir): for file in os.listdir(indir + "/" + set + "/" + bufdir): if file.find(name) >= 0 and file.find(".csv") >= 0: split_files.append(file) split_dirs.append(indir + "/" + set + "/" + bufdir + "/") if bufdir.find("Text" + reverse + algo + "Splitted") >= 0: if os.path.sdir(indir + "/" + set + "/" + bufdir): for file in os.listdir(indir + "/" + set + "/" + bufdir): if file.find(name) >= 0 and file.find(".txt"): split_texts.append(file) founds = True for i in range(len(split_files)): file = split_files[i].replace(".csv", ".txt") if qlibs.find(file, split_texts) < 0: founds = False qlibs.trace("union", split_dirs[i] + file, population) if founds == True: textdir = indir + "/" + set + "/Text" + reverse + "/" + fgr[ 2] + "/" + fgr[1] + "/" if name.find(".txt") < 0: textfile = textdir + name + ".txt" fin = open(textfile, "r") text = fin.read() fin.close() csv = "" header = False print(split_files) for i in range(len(split_files)): textsplit = split_files[i].replace(".csv", ".txt") fin = open(split_dirs[i].replace(algo, "Text") + textsplit, "r") buf = fin.read() fin.close() offset = text.find(buf) print(split_files[i], offset) fin = open(split_dirs[i] + split_files[i], "r") buf = fin.read() fin.close() rows = buf.split("\n") if header == False: csv = csv + rows[0] + "\n" header = True for j in range(1, len(rows)): cols = rows[j].split(";") if len(cols) > 1: cols[0] = str(int(cols[0]) + offset) for col in cols: csv = csv + col + ";" csv = csv + "\n" print(ucsvfile) print(csv) fout = open(ucsvfile, "w") fout.write(csv) fout.close() data = qlibs.get_uniquerows(ucsvfile, population) fout = open(csvufile, "w") fout.write(data) fout.close() return