Beispiel #1
0
def routine(gui, DATA_FILE, ANNOT_FILE):
    # gui_thread = threading.Thread(target=gui.write_to_output, args=(gui, "Reading the file...\n",))
    # gui_thread.start()
    # gui_thread.join()
    gui.write_to_output("Reading the file...\n")
    grph = Graphics()
    fromFile = readTheFile(DATA_FILE, gui)
    alternatives = findAlternatives(fromFile)
    gui.write_to_output("Total transcripts: " + str(len(fromFile)) +
                        " | APA: " + str(len(alternatives)) + "\n")
    if len(alternatives) > 0:
        gui.write_to_output("Found alternatives...\n")
    else:
        gui.write_to_output("No alternatives, check the arguments\n")
        raise SystemExit
    fracs = calculateFractions(alternatives)
    # data = fracs[0].getSamples()
    # symbols = [fracs[0].getName()]
    # for frac in fracs[1:]:
    #     data = np.vstack((data, frac.getSamples()))
    #     symbols.append(frac.getName())
    # pca = PCAVisual(data, symbols)
    # pca.show(DATA_FILE)
    gui.write_to_output("Read the annotations file...\n")
    annotations = readAnnotation(ANNOT_FILE, gui)
    gui.write_to_output("Checks the annotations...\n")
    findAnnotatedShifts(fracs, annotations, gui)
    shifts = findShifts(fracs)
    shifts = correct_fdr(shifts)
    temp = []
    for gene in shifts:
        if gene.getMaxShift() > 1.5:
            temp.append(gene)
    # grph.data_to_heat_map(temp, names, filename="str_above1d5.pdf")
    topdf2 = grph.scatter_pval_to_fold(shifts, shift=1.5, gui=gui)
    sorted(topdf2, key=lambda x: x.getName())
    fm = SimpleMotifsFinder.Family()
    sequences = open("utrs_all_alt.fa", 'w')
    threads = []
    alt_exon = check_cds(topdf2, annotations, gui)
    long_up, long_down = grph.length_by_regulation(
        [gene for gene in topdf2 if gene not in alt_exon], SAMPLES_PARTS)
    # print([gene.getName() for gene in long_up])
    # print([gene.getName() for gene in long_down])
    """
    The next lines can be executed using multiprocessing or multithreading.
    These are some times for each approach(run on set of 37 genes):
    Threading: 50 seconds
    Processing: 137.8 seconds
    Neither: 57.1 seconds
    """
    # for gene in fracs:
    #     seq = gene.getSequence()
    #     print(gene.getName())
    #     sequences.write(">" + gene.getName() + "\n")
    #     sequences.write(seq + "\n")
    #     thread = threading.Thread(target=fm.hash_sequence, args=(seq,))
    #     thread.start()
    #     threads.append(thread)
    #     # fm.hash_sequence(seq)
    #     # if gene.getName() == 'Camk2a':
    #     #     grph.show_change_in_box(gene, SAMPLES_PARTS)
    # for thread in threads:
    #     thread.join()
    # sequences.close()
    # fm.write_motifs()
    # check_cds(topdf2, annotations)
    grph.fold_change_and_pvalue(shifts)
    gui.write_to_output("Writing the output...\n")
    gui.write_to_output("Done, press 'Exit' to close the window.\n")