Beispiel #1
0
 def addREnzyme(self):
     # select enzymes to be used in the Restrict results tab
     global userChoices
     sh.log("\nstart addREnzyme")
     sh.click()
     if sh.debug:
         userChoices = [
             "AanI", "BmeDI", "ZraI", "AquIII", "YkrI", "Bau1417V", "XmnI",
             "Ble402II"
         ]
         self.REnzSelect.clear()
         for enz in userChoices:
             self.REnzSelect.insertPlainText("{:s}\n".format(enz))
     else:
         choice = self.REnzList.currentItem().text()
         sh.log("choice:" + choice)
         if choice in userChoices:
             userChoices.remove(choice)  # remove existing item
         else:
             userChoices.append(choice)  # add this item
         sh.log("user choices:" + str(userChoices))
         self.REnzSelect.clear()
         for enz in userChoices:
             self.REnzSelect.insertPlainText("{:s}\n".format(enz))
     self.detectPushButton.setEnabled(len(userChoices) > 0)
     self.clearArrayPushButton.setEnabled(len(userChoices) > 0)
Beispiel #2
0
 def resetFasta(self):
     # erase the fasta file and clear all items depending on it
     global fastaRead, userChoices
     sh.click()
     self.removeButton.setEnabled(False)
     self.genTextEdit.clear()
     self.submit_nWindow.clear()
     self.transTextEdit.clear()
     self.protTextEdit.clear()
     self.phageLabel.clear()
     self.restrictResults.clear()
     self.numeralResults.clear()
     self.REnzSelect.clear()
     userChoices = []
     self.posnResults.clear()
     self.picture_results.clear()
     self.submit_nWindow.clear()
     self.nwindow_results.clear()
     self.nwindow_results_2.clear()
     self.target_seq.clear()
     self.lookupButton.setEnabled(True)
     self.clearArrayPushButton.setEnabled(False)
     self.detectPushButton.setEnabled(False)
     self.nPosPushButton.setEnabled(False)
     self.find_nWindows.setEnabled(False)
     self.acceptButton.setEnabled(False)
     fastaRead = False
     sh.log("Erased fasta file")
Beispiel #3
0
 def putFasta(self):
     global fastaRead
     sh.log("\nstart putFasta")
     sh.click()
     self.removeButton.setEnabled(True)
     self.lookupButton.setEnabled(False)
     self.find_nWindows.setEnabled(True)
     fastaName, fasta = fd.readFasta()
     self.phageLabel.setText("{:s}:  {:s}".format(fastaName, fasta.phage))
     # Ensure that the purine length is a multiple of 3.  Truncate if needed.
     purnz = fasta.purines[:3 * ((len(fasta.purines) // 3))]
     self.genTextEdit.insertPlainText(purnz)
     self.submit_nWindow.setText(purnz)
     self.rawLabel_2.setText(
         "Use {:s} sequence or paste a new one below".format(fastaName))
     self.sequence = Seq(purnz)
     RNA = self.sequence.transcribe()  # DNA sequence -> RNA sequence
     self.transTextEdit.insertPlainText(str(RNA))
     protein = RNA.translate("Standard",
                             "#")  # nucleotide sequence -> protein sequence
     self.protTextEdit.insertPlainText(str(protein))
     (act, gct, cct, tct) = (self.sequence.count(x)
                             for x in ('A', 'G', 'C', 'T'))
     gcCount = (gct + cct) / (act + gct + cct + tct) * 100
     self.gcLabel.setText("{:5.2f}%".format(gcCount))
     atgcRatio = ((act + tct) / (gct + cct))
     self.atgcLabel.setText("{:4.2f}".format(atgcRatio))
     fastaRead = True
Beispiel #4
0
def selectedFile():
    # The user has selected a .fasta file for analysis
    global son, choice
    sh.click()
    choice = son.sonAList.currentItem().text()
    sh.log("son choice: {:s}".format(choice))
    son.close()  # close the son dialog
Beispiel #5
0
 def clearArray(self):
     global userChoices
     sh.click()
     userChoices = []
     self.REnzSelect.clear()
     sh.log("Clear choices")
     self.detectPushButton.setEnabled(len(userChoices) > 0)
     self.clearArrayPushButton.setEnabled(False)
     self.detectPushButton.setEnabled(False)
Beispiel #6
0
 def addFile(self):
     sh.click()
     sh.log("\nstart addFile")
     choice = self.dataList.currentItem().text()
     sh.log("choice: " + choice)
     if choice in self.fastaChoices:
         self.fastaChoices.remove(choice)  # remove existing item
     else:
         self.fastaChoices.append(choice)  # add this item
     sh.log(".fasta choices: " + str(self.fastaChoices))
     self.fastaSelects.clear()
     for fast in self.fastaChoices:
         self.fastaSelects.insertPlainText("{:s}\n".format(fast))
     self.acceptButton.setEnabled(len(self.fastaChoices) > 0)
Beispiel #7
0
 def acceptFastas(self):
     # Show the chosen files
     sh.log("\nstart acceptFastas")
     sh.click()
     self.acceptButton.setEnabled(False)
     savFile = "data/all.fasta"
     if Path(savFile).is_file():
         Path(savFile).unlink()  # delete old all.fasta
     sav = open(savFile, "a")  # open for append
     self.showSelects.clear()
     first = True  # first file ID = alfa
     for choice in self.fastaChoices:
         fast = fd.fasta("data/" + choice, reset=first)
         self.showSelects.insertPlainText("{:s}: {:s} -> {:s}\n".format(
             fast.ID, choice, fast.phage))
         self.showSelects.insertPlainText(
             "length = {:,d} > {:s} ... {:s}\n\n".format(
                 len(fast.purines), fast.purines[:20], fast.purines[-20:]))
         print("> {:s} {:s}".format(fast.ID, fast.phage), file=sav)
         print(fast.data, file=sav)  # copy choice to all.fasta
         first = False
     sav.close()
     self.showSelects.insertPlainText(
         "The concatenation of these choices saved as {:s}".format(savFile))
Beispiel #8
0
def exitApp():
    sh.click()
    sh.logClose()
    sys.exit(0)
Beispiel #9
0
    def markov(self):
        # Markov Model Algorithm gathered from Drexel University
        # https://faculty.coe.drexel.edu/gailr/ECE-S690-503/markov_models.ppt.pdf
        # Equation used aBA=Pr(xi=B|xi-1=A)
        sh.log("\nstart markov")
        sh.click()
        self.nPosPushButton.setEnabled(
            False)  # Need new fasta to run this again
        seq = 'ATGC'
        single = [x for x in seq]
        double = [x + y for x in seq for y in seq]
        triple = [x + y + z for x in seq for y in seq for z in seq]

        monograms = {
            monos: self.sequence.count(monos) / len(self.sequence)
            for monos in single
        }
        mono_counts = sum(monograms.values())  # Must be = 1.0

        # The following algorithm finds the probability of a dinucleotides in a sequence. --------------------------------------
        # DIGRAMS are used so the full len is found. ---------------------------------------------------------------------------

        adjusted_sequence = self.sequence[:-1]

        bi_monograms = {
            items: adjusted_sequence.count(items)
            for items in single
        }
        bigrams = {
            items: self.sequence.count(items) / bi_monograms[items[0]]
            for items in double
        }
        sh.log("monograms: " + str(monograms))
        sh.log("bi monograms " + str(bi_monograms))
        sh.log("bigrams " + str(bigrams))
        sh.log("double = " + str(double))
        bi = {x + y: bigrams[x + y] for x in seq for y in seq}
        self.posnResults.insertPlainText("Results 20\n")

        # The following algorithm finds the probability of a dinucleotides in a sequence. --------------------------------------
        # TRIGRAMS are used so the full len is found. ---------------------------------------------------------------------------

        adjusted_sequence = self.sequence[:-2]

        tri_monograms = {
            items: adjusted_sequence.count(items)
            for items in single
        }
        tri_bigrams = {
            items: adjusted_sequence.count(items)
            for items in double
        }
        trigrams = {
            items: self.sequence.count(items) / tri_bigrams[items[:-1]]
            for items in triple
        }
        sh.log("tri_monograms " + str(tri_monograms))
        sh.log("tri_bigrams   " + str(tri_bigrams))
        sh.log("trigrams      " + str(trigrams))

        self.posnResults.insertPlainText("Results 30")
        tri = {
            x + y + z: trigrams[x + y + z]
            for x in seq for y in seq for z in seq
        }
        self.posnResults.clear()

        # Generate report on the probabilities
        sep = '-----------------------------------------'
        rpt = "{:s}\nMONOGRAM PROBABILITIES\n\n".format(sep)
        for mon in seq:
            rpt += "{:s}: {:11.9f} \n".format(mon, monograms[mon])
        rpt += "\n\nTotal = {:3.1f}\n{:s}\nBIGRAM PROBABILITIES\n\n".format(
            sum(monograms.values()), sep)
        for duo in double:
            rpt += "{:2s}: {:11.9f} \n".format(duo, bi[duo])
            if duo[1] == "C": rpt += "\n"
        rpt += "\nTotal = {:3.1f}\n{:s}\nTRIGRAM PROBABILITIES\n\n".format(
            sum(bi.values()), sep)
        spc = 4
        for tre in triple:
            rpt += "{:s}: {:11.9f} \n".format(tre, tri[tre])
            if tre[2] == "C":
                rpt += "\n"
                spc -= 1
                if spc <= 0:
                    rpt += "\n"  # blank line between groups of 4
                    spc = 4
        rpt += "Total = {:3.1f}\n{:s}\n".format(sum(tri.values()), sep)

        #sh.log(str(rpt))
        self.posnResults.clear()  # Ensure we print to a blank window
        self.posnResults.insertPlainText(rpt)

        # ---------------------------------------------------------
        # Create a Bar Graph of all transition probabilities.
        # prob = dictionary of mono-, bi-, tri- grams
        # Each dictionary item has 2 lists:
        #   [leprober code] and corresponding [probability]
        # ---------------------------------------------------------
        prob = {xx: [[], []] for xx in seq}

        getStates(prob, monograms)
        getStates(prob, bigrams)
        getStates(prob, trigrams)
        for key, value in prob.items():
            sh.log("prob[{:s}]: {:s}".format(key, str(value[0])))
            sh.log("         {:s}".format(str(value[1])))

        fig, a = plt.subplots(2, 2)
        graphs = [a[0][0], a[0][1], a[1][0], a[1][1]]
        fig.set_size_inches(10, 8)
        a[0][0].bar(prob["A"][0], prob["A"][1])
        a[0][0].set_title('p(A) Transition States', fontsize=14)
        a[0][1].bar(prob["T"][0], prob["T"][1])
        a[0][1].set_title('p(T) Transition States', fontsize=14)
        a[1][0].bar(prob["C"][0], prob["C"][1])
        a[1][0].set_title('p(C) Transition States', fontsize=14)
        a[1][1].bar(prob["G"][0], prob["G"][1])
        a[1][1].set_title('p(G) Transition States', fontsize=14)
        for subs in graphs:
            plt.setp(subs.xaxis.get_majorticklabels(), rotation=90)
            subs.set_ylim(0, 1)
        plt.tight_layout(pad=1.5)
        if not Path('pictures').exists():
            Path('pictures').mkdir()  # create the directory if it is missing
        plt.savefig('pictures/Results.png', dpi=100)
        self.picture_results.setPixmap(QPixmap('pictures/Results.png'))
Beispiel #10
0
 def run_p(self):
     global userChoices, enzymes, fastaRead
     sh.log("\nstart run_p")
     sh.click()
     self.restrictResults.clear()
     self.numeralResults.clear()
     if not fastaRead:
         self.restrictResults.setPlainText(
             "You must select a fasta file first")
         return
     if len(userChoices) <= 0:
         self.restrictResults.setPlainText(
             "You must select R.Enzymes first")
         return
     self.detectPushButton.setEnabled(False)  # can't run twice
     try:
         linear = self.linearCheckBox.isChecked()
         analysis = Analysis(userChoices, self.sequence, linear=linear)
     except:
         sh.log("analysis failed " + sys.exc_info()[0])
     # print each enzyme with a list of it's matching sites
     cutSites = str(
         analysis.format_output(
             dct=None,
             title='',
             s1='\n  Enzymes which do not cut the sequence\n'))
     self.restrictResults.setPlainText(cutSites)
     # ------------------------------- FIND PALINDROME HIT COUNTS -----------------------------------------------
     try:
         endMarker = "END"
         enzymes.append(endMarker)
         # Extract enzymes and the index of their cutSites from cutSites
         palin = cutSites[:cutSites.find("Enzymes")].replace(
             '.', "").replace(':', "").split()
         palin.append(endMarker)
         sh.log("palin: " + str(palin))
     except:
         sh.log("palin NG " + sys.exec_info()[0])
     try:
         # Calculate and display the number of matching sites for each enzyme
         # enzPosn initally has a list of lists.  Each sublist has the enzyme name
         #   and the index of the enzyme in palin
         # enzPosn sublist later has the enzyme name and the number of matches.
         enzPosn = []
         enzNone = []
         sh.log("len palin " + str(len(palin)))
         sh.log("user choices " + str(userChoices))
         allChoices = userChoices
         allChoices.append(endMarker)  # matches last name in palin
         sh.log("allChoices " + str(allChoices))
         for enz in allChoices:
             if enz in palin:
                 enzPosn.append([enz, palin.index(enz)])
             else:
                 sh.log(enz + " not in palin")
                 enzNone.append(enz)
         sh.log("enzPosn = " + str(enzPosn))
         enzPosn.sort(key=lambda x: x[1])  # sort on index of name in palin
         for i in range(len(enzPosn) - 1):  # Replace the index with the
             enzPosn[i][1] = enzPosn[
                 i + 1][1] - enzPosn[i][1] - 1  # length of palin entry
         del enzPosn[-1]  # delete endMarker
         for enz in enzNone:
             enzPosn.append([enz,
                             0])  # add in enzymes not found; length = 0
         enzPosn.sort(key=lambda x: x[0])  # sort on name
         sh.log("enzPosn = " + str(enzPosn))
         for i in range(len(
                 enzPosn)):  # show the number of matches for each enzyme
             matchStr = "{0:7,d} : {1:s}\n\n".format(
                 enzPosn[i][1], enzPosn[i][0])
             self.numeralResults.insertPlainText(matchStr)
     except:
         sh.log('I cannot do that. ' + sys.exec_info()[0])
     self.detectPushButton.setEnabled(False)
     self.nPosPushButton.setEnabled(True)
Beispiel #11
0
    def find_windows(self):
        # creates a window of information based on user parameters that can be submitted to other sequence software
        # Raw to Defined to string[ start_index_pos: end_index_pos: step_size]
        global nwindow
        sh.click()
        sh.log("\nStart find_windows")
        length = int(self.nWindow_length.value())

        # First, identify user's FRAME reference.
        if self.frame_select.currentText() == 'Frame 1':
            #temp_hold = str(self.submit_nWindow.text())
            temp_hold = self.submit_nWindow.text()
        elif self.frame_select.currentText() == 'Frame 2':
            temp_hold = self.submit_nWindow.text()[1:]
        elif self.frame_select.currentText() == 'Frame 3':
            temp_hold = self.submit_nWindow.text()[2:]
        elif self.frame_select.currentText() == 'Frame -1':
            temp_hold = self.submit_nWindow.text()[::-1]
        elif self.frame_select.currentText() == 'Frame -2':
            temp_hold = self.submit_nWindow.text()[-2::-1]
        elif self.frame_select.currentText() == 'Frame -3':
            temp_hold = self.submit_nWindow.text()[-3::-1]

        # Second, use BIOPYTHON to translate the initial text using the central dogma
        if self.type_nwindow.currentText() == 'basic':
            nwindow = Seq(temp_hold).complement()
        elif self.type_nwindow.currentText() == 'transcribe':
            nwindow = Seq(temp_hold).transcribe()
        elif self.type_nwindow.currentText() == 'translate':
            nwindow = Seq(temp_hold).translate()
        #print("nwindow", nwindow[:80])
        window_out = [
            str(nwindow)[i:i + length] for i in range(0, len(nwindow), length)
        ]
        sh.log("window_out ({:n}): {:s}".format(len(window_out),
                                                str(window_out)[:80]))

        # Third, identify the USER requested MOTTIF target and display the
        # Check that the user MTT entry is valid
        self.nwindow_results_2.clear()
        tgtMTT = self.target_seq.text().upper()  # ensure MTT is all capitals
        if len(tgtMTT) > length:
            self.nwindow_results_2.insertPlainText \
                ("MTT length must be <= {:n}".format (length))
        else:
            if len(tgtMTT) == 0:
                self.nwindow_results_2.insertPlainText("No MTT selected")
            else:
                targetSet = set()  # set of all MTT matches
                for finds in window_out:
                    if finds.find(tgtMTT) >= 0:
                        targetSet.add(finds)
                if len(targetSet) == 0:
                    self.nwindow_results_2.insertPlainText(
                        'No matches found for {:s} ({:s})'.format(
                            tgtMTT,
                            self.type_nwindow.currentText().upper()))
                else:
                    self.nwindow_results_2.insertPlainText(
                        'Target Spotted! - Displaying your {:s} ({:s}) report below\n\n' \
                        .format (tgtMTT, self.type_nwindow.currentText().upper()))
                    for finds in targetSet:
                        self.nwindow_results_2.insertPlainText(finds + '\n')
            for strings in window_out:
                if len(strings) < length:
                    window_out.pop(window_out.index(strings))
            xlate = str(list(window_out)).maketrans(
                "", "", "'[],")  # don't show "[],'"
            self.nwindow_results.setPlainText(
                str(list(window_out)).translate(xlate))