Example #1
0
def select_by(population, infile, outfile, nval, val):
    print(infile)
    rows = qlibs.get_rows(infile)
    header = 1000
    text = ""
    posgs = -1
    for i in range(len(rows)):
        cols = rows[i].split(";")
        if len(cols) > 1:
            if header == 1000:
                text = rows[i] + ("\n")
                header = i
                hcols = rows[i].split(";")
                posgs = qlibs.find(nval, hcols)
                if posgs == -1:
                    posgs = qlibs.find(nval.replace('"', ''), hcols)
                    if posgs == -1:
                        qlibs.trace("selectBy", infile, population)
                        return
            if i > header:
                if cols[posgs] == val: text = text + rows[i] + "\n"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()
    return
Example #2
0
def get_genes(population, datafile, xname, yname):
    rows = qlibs.get_rows(datafile)
    header = False
    x = []
    y = []
    xcol = -1
    ycol = -1
    prev = ""
    id = 0
    for row in rows:
        cols = row.split(";")
        if len(cols) > 1: 
            if header == False:
                xcol = qlibs.find(xname, cols)
                if  xcol >= 0: header = True
                ycol = qlibs.find(yname, cols)
                if  ycol >= 0: header = True
            else:
                if cols[xcol] != prev: 
                    id = id + 1
                    prev = cols[xcol]                    
                x.append(id)
                if cols[ycol].replace(",","").replace("-","").isdecimal() == True: y.append(float(cols[ycol].replace(",",".")))
                else:
                    y.append(0)
                    qlibs.trace("graph_fields" + yname, datafile + "\n" + str(cols), population)
    return [x, y]
Example #3
0
def select_score(population, infile, outfile, nval, rel, threshold):
    print(infile)
    rows = qlibs.get_rows(infile)
    header = 1000
    text = ""
    posgs = -1
    for i in range(len(rows)):
        cols = rows[i].split(";")
        if len(cols) > 1:
            if header == 1000:
                text = rows[i] + ("\n")
                header = i
                hcols = rows[i].split(";")
                posgs = qlibs.find(nval, hcols)
                if posgs == -1:
                    posgs = qlibs.find(nval.replace('"', ''), hcols)
                    if posgs == -1:
                        qlibs.trace("selectS", infile, population)
                        return
            if i > header:
                if rel == "greater_than":
                    if int(cols[posgs]) >= threshold:
                        text = text + rows[i] + "\n"
                if rel == "less_than":
                    if int(cols[posgs]) <= threshold:
                        text = text + rows[i] + "\n"
                if rel == "equal_to":
                    if int(cols[posgs]) == threshold:
                        text = text + rows[i] + "\n"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()
    return
Example #4
0
def start_file(population, algo):
    logdir = qlibs.get_logdir() + population + "/"
    startf = get_filename(population, algo, 0)
    files = []
    for name in os.listdir(logdir):
        if os.path.isfile(os.path.join(logdir, name)):
            if name.find(algo.lower() + "_error") >= 0:
                if name.find(".log") >= 0:
                    files.append(name)
    list = []
    for file in files:
        infile = logdir + file
        f = open(infile, "r")
        buf = f.read()
        f.close()
        lines = buf.split("\n")
        for line in lines:
            if line.find("C:/") >= 0 or line.find("./") >= 0:
                if line.find(".txt") >= 0:
                    if qlibs.find(line, list) < 0:
                        list.append(line)
    f = open(startf, "w")
    for line in list:
        f.write(line + "\n")
    f.close()

    return
Example #5
0
def txt_equals_csv(population, algo, reverse, count):
    splitted = ""
    for i in range(0, count):
        splitted = splitted + "Splitted"
    indir = qlibs.get_datadir() + population
    sets = os.listdir(indir)
    text = ""
    for set in sets:
        csvfiles = []
        txtfiles = []
        if os.path.isdir(indir + "/" + set):
            for dir in os.listdir(indir + "/" + set):
                if dir.find(algo + reverse + splitted) >= 0:
                    if os.path.isdir(indir + "/" + set + "/" + dir + "/"):
                        for file in os.listdir(indir + "/" + set + "/" + dir +
                                               "/"):
                            if file.find(".csv") > 0: csvfiles.append(file)
                if dir.find("Text" + reverse + algo + splitted) >= 0:
                    if os.path.isdir(indir + "/" + set + "/" + dir + "/"):
                        for file in os.listdir(indir + "/" + set + "/" + dir +
                                               "/"):
                            if file.find(".txt") > 0: txtfiles.append(file)
            for file in txtfiles:
                if qlibs.find(file.replace(".txt", ".csv"), csvfiles) < 0:
                    text = text + qlibs.get_datadir(
                    ) + population + "/" + set + "/Text" + reverse + algo + splitted + "/" + file + "\n"
    if text == "": result = True
    else:
        outfile = get_filename(population, algo, count)
        fout = open(outfile, "w")
        fout.write(text)
        fout.close()
        result = False
    return result
Example #6
0
def get_scores(population, datafile, nscore):
    rows = qlibs.get_rows(datafile)
    header = False
    result = []
    ncol = -1
    for row in rows:
        cols = row.split(";")
        if len(cols) > 1: 
            if header == False:
                ncol = qlibs.find(nscore, cols)
                if  ncol >= 0: header = True
                else:
                    ncol = qlibs.find(nscore.replace('"',''), cols)
                    if  ncol >= 0: header = True
            else:
                if cols[ncol].isnumeric() == True: result.append(int(cols[ncol]))
                else: qlibs.trace("graph_freq", datafile + "\n" + str(cols), population)
    return result
Example #7
0
def get_seq(population, argv):
    result = []
    basedir = qlibs.get_datadir() + population + "/"
    countfile = qlibs.get_datadir() + "Countfile.log"

    indata = params(argv)
    finput = indata[0]
    search = indata[1]
    col3rd = indata[3]

    if col3rd == False:
        basename = os.path.basename(indata[0])
        result.append(basename[:len(basename) - 4])
        datadir = basedir + basename[:len(basename) - 4]
        datadir = create_dir(datadir, indata)

    fin = open(finput, "r")
    buf = fin.read()
    fin.close()
    rows = buf.split("\n")

    try:
        fc = open(countfile, "r")
        buf = fc.read()
        fc.close()
        cols = buf.split("\t")
        count = int(cols[0])
        day = time.strptime(cols[3], "%Y-%m-%d")
        today = time.strptime(str(datetime.date.today()), "%Y-%m-%d")
        print(day, today)
        if today > day:
            count = 0
        elif count >= 5000:
            tk.messagebox.showinfo("Quad", "Daily hits limit reached!")
            return result
        if cols[2] == finput:
            i = int(cols[1])
            if i >= len(rows):
                res = tk.messagebox.askyesno(
                    "Quad",
                    "WARNING:\n\n File already processed \n\n Do you want to force Execution?"
                )
                if res == True:
                    i = 0

        else:
            i = 0
    except:
        count = 0
        i = 0

    while i < len(rows):
        cols = rows[i].split(";")
        if len(cols) > 1:
            code = cols[0]
            gene = cols[1]
            if col3rd == True:
                if qlibs.find(cols[2], result) < 0: result.append(cols[2])
                datadir = basedir + cols[2]
                datadir = create_dir(datadir, indata)
            count = count + 1
            print(count, gene, code)
            text = get_fasta(gene, code, search)
            if text == "":
                text = get_fasta_gene(gene, code, search)
                if text == "":
                    get_fasta_alt(gene, code, search)
                    if text == "":
                        text = get_fasta_gene_alt(gene, code, search)
                        if text == "":
                            qlibs.trace("fasta", gene + "\t" + code,
                                        population)
                        else:
                            fout = open(
                                datadir + gene + "__" + code + ".fasta", "w")
                            fout.write(text)
                            fout.close()
                    else:
                        fout = open(datadir + gene + "__" + code + ".fasta",
                                    "w")
                        fout.write(text)
                        fout.close()
                else:
                    fout = open(datadir + gene + "__" + code + ".fasta", "w")
                    fout.write(text)
                    fout.close()
            else:
                fout = open(datadir + gene + "__" + code + ".fasta", "w")
                fout.write(text)
                fout.close()
            if count % 5000 == 0:
                tk.messagebox.showinfo("Quad", "Daily hits limit reached!")
                break
            time.sleep(5)
        i = i + 1

    fc = open(countfile, "w")
    fc.write(
        str(count) + "\t" + str(i) + "\t" + finput + "\t" +
        str(datetime.date.today()))
    fc.close()
    return result
Example #8
0
def intersect(population, algo):
    lists = []
    sets = []
    dists = []
    indir = qlibs.get_datadir() + population + "/"
    outfile = indir + "intersect_genes_" + population + str(
        datetime.date.today()).replace("-", "") + ".csv"
    for set in os.listdir(indir):
        if os.path.isdir(indir + set):
            files = [
                f for f in os.listdir(indir + set)
                if os.path.isfile(indir + set + "/" + f)
            ]
            lastfile = ""
            for file in files:
                try:
                    if file.index("dist_" + algo) == 0 and file.index(
                            ".csv") == len(file) - 4:
                        if file > lastfile: lastfile = file
                except:
                    pass
            if lastfile == "": qlibs.trace("intersect", set, pop=population)
            else:
                lists.append(qlibs.get_uniques(indir + set + "/" + lastfile))
                sets.append(set)
                f = open(indir + set + "/" + lastfile, "r")
                buf = f.read()
                f.close()
                dists.append(buf)
    elems = []
    setreps = []
    setidxs = []
    for i in range(len(sets) - 1):
        j = i + 1
        while j < len(lists):
            for elem in lists[i]:
                if qlibs.find(elem, lists[j]) >= 0:
                    k = qlibs.find(elem, elems)
                    if k >= 0:
                        if qlibs.find(sets[j], setreps[k]) < 0:
                            setreps[k].append(sets[j])
                            setidxs[k].append(j)
                    else:
                        elems.append(elem)
                        list = [sets[i], sets[j]]
                        setreps.append(list)
                        list = [i, j]
                        setidxs.append(list)
            j = j + 1
    text = ""
    print(sets)
    print(setreps)
    print(setidxs)
    for i in range(len(elems)):
        text = text + elems[i] + ";"
        for set in setreps[i]:
            text = text + set + ";"
        text = text + "\n"
    if text == "": text = "No intersection found"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()

    text = ""
    for i in range(len(elems)):
        j = setidxs[i][0]
        buf = dists[j]
        print(j)
        print(elems[i])
        rows = buf.split("\n")
        for row in rows:
            if row.find(elems[i]) >= 0:
                text = text + row
                for set in setreps[i]:
                    text = text + set + ";"
                text = text + "\n"
    outfile = indir + "intersect_dists_" + algo + "_" + population + str(
        datetime.date.today()).replace("-", "") + ".csv"
    fout = open(outfile, "w")
    fout.write(text)
    fout.close()

    return
Example #9
0
def union(indir, set, dir, infile, population, algo, reverse):
    fgr = split_dir(infile)
    name = fgr[0]
    csvudir = indir + set + "/"
    if not os.path.exists(csvudir):
        os.mkdir(csvudir)
    csvudir = indir + set + "/" + algo + reverse + "/"
    if not os.path.exists(csvudir):
        os.mkdir(csvudir)
    csvudir = indir + set + "/" + algo + reverse + "/" + fgr[2] + "/"
    if not os.path.exists(csvudir):
        os.mkdir(csvudir)
    csvudir = indir + set + "/" + algo + reverse + "/" + fgr[2] + "/" + fgr[
        1] + "/"
    if not os.path.exists(csvudir):
        os.mkdir(csvudir)
    if name.find(".csv") < 0: csvufile = csvudir + name + ".csv"

    ucsvdir = indir + "/" + set + "/"
    if not os.path.exists(ucsvdir):
        os.mkdir(ucsvdir)
    ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/"
    if not os.path.exists(ucsvdir):
        os.mkdir(ucsvdir)
    ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/" + fgr[2] + "/"
    if not os.path.exists(ucsvdir):
        os.mkdir(ucsvdir)
    ucsvdir = indir + "/" + set + "/" + algo + reverse + "Union/" + fgr[
        2] + "/" + fgr[1] + "/"
    if not os.path.exists(ucsvdir):
        os.mkdir(ucsvdir)
    if name.find(".csv") < 0: ucsvfile = ucsvdir + name + ".csv"

    split_files = []
    split_texts = []
    split_dirs = []
    if not os.path.exists(csvufile):
        if os.path.sdir(indir + "/" + set + "/"):
            for bufdir in os.listdir(indir + "/" + set + "/"):
                if bufdir.find(algo + reverse + "Splitted") >= 0:
                    if os.path.sdir(indir + "/" + set + "/" + bufdir):
                        for file in os.listdir(indir + "/" + set + "/" +
                                               bufdir):
                            if file.find(name) >= 0 and file.find(".csv") >= 0:
                                split_files.append(file)
                                split_dirs.append(indir + "/" + set + "/" +
                                                  bufdir + "/")
                if bufdir.find("Text" + reverse + algo + "Splitted") >= 0:
                    if os.path.sdir(indir + "/" + set + "/" + bufdir):
                        for file in os.listdir(indir + "/" + set + "/" +
                                               bufdir):
                            if file.find(name) >= 0 and file.find(".txt"):
                                split_texts.append(file)
        founds = True
        for i in range(len(split_files)):
            file = split_files[i].replace(".csv", ".txt")
            if qlibs.find(file, split_texts) < 0:
                founds = False
                qlibs.trace("union", split_dirs[i] + file, population)
        if founds == True:
            textdir = indir + "/" + set + "/Text" + reverse + "/" + fgr[
                2] + "/" + fgr[1] + "/"
            if name.find(".txt") < 0: textfile = textdir + name + ".txt"
            fin = open(textfile, "r")
            text = fin.read()
            fin.close()
            csv = ""
            header = False
            print(split_files)
            for i in range(len(split_files)):
                textsplit = split_files[i].replace(".csv", ".txt")
                fin = open(split_dirs[i].replace(algo, "Text") + textsplit,
                           "r")
                buf = fin.read()
                fin.close()
                offset = text.find(buf)
                print(split_files[i], offset)
                fin = open(split_dirs[i] + split_files[i], "r")
                buf = fin.read()
                fin.close()
                rows = buf.split("\n")
                if header == False:
                    csv = csv + rows[0] + "\n"
                    header = True
                for j in range(1, len(rows)):
                    cols = rows[j].split(";")
                    if len(cols) > 1:
                        cols[0] = str(int(cols[0]) + offset)
                        for col in cols:
                            csv = csv + col + ";"
                        csv = csv + "\n"
            print(ucsvfile)
            print(csv)
            fout = open(ucsvfile, "w")
            fout.write(csv)
            fout.close()

            data = qlibs.get_uniquerows(ucsvfile, population)

            fout = open(csvufile, "w")
            fout.write(data)
            fout.close()

    return