def __init__(self):
        # qt stuff
        super(cspr_chromesome_selection, self).__init__()
        uic.loadUi("cspr_chromesome_selection.ui", self)
        self.setWindowTitle("Choose which chromesomes to pull data from!")
        self.setWindowIcon(Qt.QIcon("cas9image.png"))

        # button connections
        self.cancel_button.clicked.connect(self.cancel_function)
        self.submit_button.clicked.connect(self.submit_function)
        self.select_button.clicked.connect(self.load_chrom_names)

        # chrom_table stuff
        self.chromesome_table.setColumnCount(1)
        self.chromesome_table.setShowGrid(True)
        self.chromesome_table.setHorizontalHeaderLabels("Chromesome;".split(";"))
        self.chromesome_table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
        self.chromesome_table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.chromesome_table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)

        # cspr_files_available_table
        self.cspr_files_available_table.setColumnCount(1)
        self.cspr_files_available_table.setShowGrid(True)
        self.cspr_files_available_table.setHorizontalHeaderLabels("Files;".split(";"))
        self.cspr_files_available_table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
        self.cspr_files_available_table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.cspr_files_available_table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)

        # variables
        self.cspr_file  = ''
        self.gene = ''
        self.misc = ''
        self.avail_cspr = dict()
        self.myParser = CSPRparser(self.cspr_file)
        self.orgName = ''
Beispiel #2
0
    def __init__(self):
        # qt stuff
        super(genLibrary, self).__init__()
        uic.loadUi('library_prompt.ui', self)
        self.setWindowTitle('Generate Library')
        self.setWindowIcon(Qt.QIcon('cas9image.png'))

        # button connections
        self.cancel_button.clicked.connect(self.cancel_function)
        self.BrowseButton.clicked.connect(self.browse_function)
        self.submit_button.clicked.connect(self.submit_data)
        self.progressBar.setValue(0)

        # variables
        self.anno_data = dict()
        self.cspr_file = ''
        self.parser = CSPRparser('')
        self.kegg_nonKegg = ''
        self.gen_lib_dict = dict()
        self.S = SeqTranslate()
        self.cspr_data = dict()
        self.Output = dict()
        self.off_tol = .05
        self.off_max_misMatch = 4
        self.off_target_running = False

        # set the numbers for the num genes combo box item
        for i in range(10):
            self.numGenescomboBox.addItem(str(i + 1))

        # set the numbers for the minOn combo box
        for i in range(19, 70):
            self.minON_comboBox.addItem(str(i + 1))
Beispiel #3
0
    def __init__(self, parent=None):

        super(Multitargeting, self).__init__()
        uic.loadUi('multitargetingwindow.ui', self)
        self.setWindowIcon(QtGui.QIcon("cas9image.png"))
        # Storage containers for the repeats and seed sequences
        self.sq = SeqTranslate()  # SeqTranslate object used in class

        # Initializes the three graphs
        self.chart_view_chro_bar = QChartView()
        self.chart_view_repeat_bar = QChartView()
        self.chart_view_repeat_line = QChartView()

        self.data = ""
        self.shortHand = ""
        self.chromo_length = list()

        # Listeners for changing the seed sequence or the .cspr file
        self.max_chromo.currentIndexChanged.connect(self.fill_seed_id_chrom)
        self.min_chromo.currentIndexChanged.connect(self.fill_seed_id_chrom)
        self.chromo_seed.currentIndexChanged.connect(self.chro_bar_data)
        self.Analyze_Button.clicked.connect(self.make_graphs)

        #go back to main button
        self.back_button.clicked.connect(self.go_back)

        #Tool Bar options
        self.actionCASPER.triggered.connect(self.changeto_main)

        # Statistics storage variables
        self.max_repeats = 1
        self.average = 0
        self.median = 0
        self.mode = 0
        self.average_unique = 0
        self.average_rep = 0
        self.bar_coords = []
        self.seed_id_seq_pair = {}
        self.positions = []

        #parser object
        self.parser = CSPRparser("")

        self.ready_chromo_min_max = True
        self.ready_chromo_make_graph = True
        self.directory = 'Cspr files'
        self.info_path = os.getcwd()

        ##################################
        self.scene = QtWidgets.QGraphicsScene()
        self.graphicsView.setScene(self.scene)
        self.scene2 = QtWidgets.QGraphicsScene()
        self.graphicsView_2.setScene(self.scene2)
        self.graphicsView.viewport().installEventFilter(self)
Beispiel #4
0
    def __init__(self):
        # qt stuff
        super(genLibrary, self).__init__()
        uic.loadUi(GlobalSettings.appdir + 'library_prompt.ui', self)
        self.setWindowTitle('Generate Library')
        self.setWindowIcon(Qt.QIcon(GlobalSettings.appdir + 'cas9image.png'))

        groupbox_style = """
        QGroupBox:title{subcontrol-origin: margin;
                        left: 10px;
                        padding: 0 5px 0 5px;}
        QGroupBox#Step1{border: 2px solid rgb(111,181,110);
                        border-radius: 9px;
                        font: 14pt "Arial";
                        font: bold;
                        margin-top: 10px;}"""
        self.Step1.setStyleSheet(groupbox_style)
        self.Step2.setStyleSheet(groupbox_style.replace("Step1", "Step2"))
        self.Step3.setStyleSheet(groupbox_style.replace("Step1", "Step3"))
        self.Step4.setStyleSheet(groupbox_style.replace("Step1", "Step4"))


        # button connections
        self.cancel_button.clicked.connect(self.cancel_function)
        self.BrowseButton.clicked.connect(self.browse_function)
        self.submit_button.clicked.connect(self.submit_data)
        self.progressBar.setValue(0)

        # variables
        self.anno_data = dict()
        self.kegg_nonKegg = ''
        self.gen_lib_dict = dict()
        self.cspr_data = dict()
        self.Output = dict()
        self.off_tol = .05
        self.off_max_misMatch = 4
        self.off_target_running = False
        self.parser = CSPRparser("")

        # set the numbers for the num genes combo box item
        for i in range(10):
            self.numGenescomboBox.addItem(str(i + 1))

        # set the numbers for the minOn combo box
        for i in range(19, 70):
            self.minON_comboBox.addItem(str(i + 1))
Beispiel #5
0
class Multitargeting(QtWidgets.QMainWindow):

    BAD_instances = {}
    sorted_instances = []

    def __init__(self, parent=None):

        super(Multitargeting, self).__init__()
        uic.loadUi('multitargetingwindow.ui', self)
        self.setWindowIcon(QtGui.QIcon("cas9image.png"))
        # Storage containers for the repeats and seed sequences
        self.sq = SeqTranslate()  # SeqTranslate object used in class

        # Initializes the three graphs
        self.chart_view_chro_bar = QChartView()
        self.chart_view_repeat_bar = QChartView()
        self.chart_view_repeat_line = QChartView()

        self.data = ""
        self.shortHand = ""
        self.chromo_length = list()

        # Listeners for changing the seed sequence or the .cspr file
        self.max_chromo.currentIndexChanged.connect(self.fill_seed_id_chrom)
        self.min_chromo.currentIndexChanged.connect(self.fill_seed_id_chrom)
        self.chromo_seed.currentIndexChanged.connect(self.chro_bar_data)
        self.Analyze_Button.clicked.connect(self.make_graphs)

        #go back to main button
        self.back_button.clicked.connect(self.go_back)

        #Tool Bar options
        self.actionCASPER.triggered.connect(self.changeto_main)

        # Statistics storage variables
        self.max_repeats = 1
        self.average = 0
        self.median = 0
        self.mode = 0
        self.average_unique = 0
        self.average_rep = 0
        self.bar_coords = []
        self.seed_id_seq_pair = {}
        self.positions = []

        #parser object
        self.parser = CSPRparser("")

        self.ready_chromo_min_max = True
        self.ready_chromo_make_graph = True
        self.directory = 'Cspr files'
        self.info_path = os.getcwd()

        ##################################
        self.scene = QtWidgets.QGraphicsScene()
        self.graphicsView.setScene(self.scene)
        self.scene2 = QtWidgets.QGraphicsScene()
        self.graphicsView_2.setScene(self.scene2)
        self.graphicsView.viewport().installEventFilter(self)

    def eventFilter(self, source, event):
        if (event.type() == QtCore.QEvent.MouseMove
                and source is self.graphicsView.viewport()):
            coord = self.graphicsView.mapToScene(event.pos())
            first = True
            for i in self.bar_coords:
                ind = i[0]
                x = i[1]
                y1 = i[2]
                y2 = i[3]
                dups = 0
                if ((coord.x() == x or coord.x() == x + 1
                     or coord.x() == x - 1)
                        and (coord.y() >= y1 and coord.y() <= y2)):

                    listtemp = []
                    for a in self.bar_coords:
                        if (x == a[1] and y1 == a[2] and y2 == a[3]):
                            listtemp.append(a)
                            dups += 1
                    self.scene2 = QtWidgets.QGraphicsScene()
                    self.graphicsView_2.setScene(self.scene2)
                    #self.graphicsView_2.hide()
                    output = str()
                    i = 1
                    for item in listtemp:

                        ind = item[0]
                        seq = str(self.seq_data[ind])
                        seed_id = self.seed_id_seq_pair[seq]
                        temp = self.parser.dec_tup_data[seed_id]
                        temp = temp[ind]
                        if len(listtemp) > 1 and i < len(listtemp):
                            output += 'Location: ' + str(
                                temp[0]) + ' | Seq: ' + str(
                                    temp[1]) + ' | PAM: ' + str(
                                        temp[2]) + ' | SCR: ' + str(
                                            temp[3]) + ' | DIRA: ' + str(
                                                temp[4]) + '\n'
                        else:
                            output += 'Location: ' + str(
                                temp[0]) + ' | Seq: ' + str(
                                    temp[1]) + ' | PAM: ' + str(
                                        temp[2]) + ' | SCR: ' + str(
                                            temp[3]) + ' | DIRA: ' + str(
                                                temp[4])
                        i += 1
                    text = self.scene2.addText(output)
                    #self.graphicsView_2.adjustSize()
                    font = QtGui.QFont()
                    font.setBold(True)
                    font.setPointSize(9)
                    text.setFont(font)

        return Qt.QWidget.eventFilter(self, source, event)

    def launch(self, path):
        os.chdir(path)
        self.directory = path
        self.get_data()
        self.make_graphs()

    def get_data(self):
        onlyfiles = [
            f for f in os.listdir(self.directory)
            if os.path.isfile(os.path.join(self.directory, f))
        ]
        print(onlyfiles)
        orgsandendos = {}
        shortName = {}
        self.endo_drop.clear()
        for file in onlyfiles:
            if file.find('.cspr') != -1:
                newname = file[0:-4]
                s = newname.split('_')
                hold = open(file)
                buf = (hold.readline())
                species = buf[8:buf.find('\n')]
                endo = str(s[1])
                if species not in shortName:
                    shortName[species] = s[0]
                if species in orgsandendos:
                    orgsandendos[species].append(endo)
                else:
                    orgsandendos[species] = [endo]
                    if self.organism_drop.findText(species) == -1:
                        self.organism_drop.addItem(species)
        self.data = orgsandendos
        self.shortHand = shortName
        temp = self.data[str(self.organism_drop.currentText())]
        temp1 = []
        for i in temp:
            i = i.strip('.')
            temp1.append(i)
        self.endo_drop.addItems(temp1)
        self.organism_drop.currentIndexChanged.connect(self.changeEndos)

    def changeEndos(self):
        self.endo_drop.clear()
        temp = self.data[str(self.organism_drop.currentText())]
        temp1 = []
        for i in temp:
            i = i.strip('.')
            temp1.append(i)
            print(i)
        print(temp1)
        self.endo_drop.addItems(temp1)

    def make_graphs(self):
        #get the correct file name
        self.chromo_length.clear()
        file_name = self.shortHand[self.organism_drop.currentText(
        )] + "_" + self.endo_drop.currentText()
        if self.directory.find("/") != -1:
            file = (self.directory + "/" + file_name + ".cspr")
        else:
            file = (self.directory + "\\" + file_name + ".cspr")

        #set up parser, and get the repeats and carry stats
        self.parser.fileName = file
        print(self.endo_drop.currentText())
        self.parser.read_repeats(self.endo_drop.currentText())
        self.parser.read_chromesome(self.endo_drop.currentText())
        self.parser.read_first_lines()
        self.chromo_length = self.parser.karystatsList

        #calculations and setting the windows
        self.average_rep = self.parser.multiSum / self.parser.multiCount
        self.plot_repeats_vs_seeds()
        self.bar_seeds_vs_repeats()
        self.fill_min_max()
        #self.chro_bar_data()
        self.nbr_seq.setText(str(len(self.parser.seeds)))
        self.nbr_unq.setText(str(self.parser.uniq_seq_count()))
        self.avg_rep.setText(str(self.average))
        self.med_rep.setText(str(self.median))
        self.mode_rep.setText(str(self.mode))
        self.scr_lbl.setText(str(self.average_rep))

    #fill in chromo bar visualization
    def chro_bar_data(self):

        if self.ready_chromo_make_graph == False:
            return
        dic_info = {}
        seqLength = int(self.sq.endo_info[self.endo_drop.currentText()][1])
        for seed in self.parser.seeds:
            temp = seed
            temp1 = str(
                self.sq.decompress64(temp, slength=seqLength, toseq=True))
            self.seed_id_seq_pair[temp1] = seed
            dic_info[temp1] = {}
            for repeat in self.parser.seeds[seed]:
                if repeat[0] in dic_info[temp1]:
                    dic_info[temp1][repeat[0]].append(
                        self.sq.decompress64(repeat[1]))
                else:
                    dic_info[temp1][repeat[0]] = [
                        self.sq.decompress64(repeat[1])
                    ]
        self.chro_bar_create(dic_info)
        self.fill_Chromo_Text(dic_info)

    #fill in chromo bar visualization
    def fill_Chromo_Text(self, info):
        chromo_pos = {}
        self.seq_data = []
        self.positions.clear()
        chomonum = 0
        for chromo in info[self.chromo_seed.currentText()]:
            pos = []
            for position in info[(self.chromo_seed.currentText())][chromo]:
                self.seq_data.append(self.chromo_seed.currentText())
                test1 = position / self.chromo_length[int(chromo) - 1]
                test1 = int(test1 * 485)
                self.positions.append(test1)
                pos.append(test1)

            chromo_pos[chromo] = pos
            chomonum += 1

        i = 0
        self.scene = QtWidgets.QGraphicsScene()
        self.graphicsView.setScene(self.scene)
        self.bar_coords.clear()  #clear bar_coords list before creating visual
        ind = 0
        for chromo in chromo_pos:
            pen_blk = QtGui.QPen(QtCore.Qt.black)
            pen_red = QtGui.QPen(QtCore.Qt.red)
            pen_blk.setWidth(3)
            pen_red.setWidth(3)
            if i == 0:
                text = self.scene.addText(str(chromo))
                text.setPos(0, 0)
                font = QtGui.QFont()
                font.setBold(True)
                font.setPointSize(10)
                text.setFont(font)
                self.scene.addRect(40, (i * 25), 525, 25, pen_blk)

            else:
                text = self.scene.addText(str(chromo))
                font = QtGui.QFont()
                font.setBold(True)
                font.setPointSize(10)
                text.setFont(font)
                text.setPos(0, i * 25 + 10 * i)

                self.scene.addRect(40, (i * 25) + 10 * i, 525, 25, pen_blk)
            for k in chromo_pos[chromo]:
                line = self.scene.addLine(k + 40, (i * 25) + 3 + 10 * i,
                                          k + 40, (i * 25) + 22 + 10 * i,
                                          pen_red)
                temp = [
                ]  #used for storing coordinates and saving them in self.bar_coords[]
                temp.append(ind)  #index value
                temp.append(k + 40)  #x value
                temp.append((i * 25) + 3 + 10 * i)  #y1
                temp.append((i * 25) + 22 + 10 * i)  #y2
                self.bar_coords.append(temp)  #push x, y1, and y2 to this list
                ind += 1
            i = i + 1

    #creates bar graph num of repeats vs. chromsome
    #this graphs is connected to the repeats_vs_chromo.py file
    #to represent the widget space in the UI file
    def chro_bar_create(self, info):
        x1 = []
        y1 = []
        lentemp = 0
        for chromo in info[self.chromo_seed.currentText()]:
            y1.append(len(info[self.chromo_seed.currentText()][chromo]))
            x1.append(chromo)
            if (int(chromo) > lentemp):
                lentemp = int(chromo)
        #clear the old graph
        self.repeats_vs_chromo.canvas.axes.clear()
        #x_pos used to format the addition of more bars appropriately
        x_pos = [i for i, _ in enumerate(x1)]

        #loop fixes when there is too many xlabels and they start running together,
        #replaces some with an empty string to space out the labels
        if (len(x_pos) > 20):
            temp = 0
            for i in x_pos:
                if (i == 0):
                    temp += 1
                else:
                    if (temp < len(str(lentemp)) + 2):
                        x1[i] = ""
                        temp += 1
                    else:
                        temp = 0

        #the following statements are plottings / formatting for the graph
        self.repeats_vs_chromo.canvas.axes.bar(x_pos, y1, align='center')
        self.repeats_vs_chromo.canvas.axes.yaxis.set_major_locator(
            MaxNLocator(integer=True))
        self.repeats_vs_chromo.canvas.axes.set_ylim(0, max(y1) + 1)
        self.repeats_vs_chromo.canvas.axes.set_xticks(x_pos)
        self.repeats_vs_chromo.canvas.axes.set_xticklabels(x1)
        self.repeats_vs_chromo.canvas.axes.set_xlabel('Chromosome')
        self.repeats_vs_chromo.canvas.axes.set_ylabel('Number of Repeats')

        #for loop below could be used to rotae labels for spacing
        #for tick in self.repeats_vs_chromo.canvas.axes.get_xticklabels():
        #   tick.set_rotation(90)

        self.repeats_vs_chromo.canvas.draw()

    #plots the sequences per Number Repeats bar graph
    #this graph is connected to the seeds_vs_repeats_bar.py file
    #to represent the wdiget space in the UI file
    def bar_seeds_vs_repeats(self):
        data = {}
        self.average = 0
        for seed in self.parser.repeats:
            self.average += int(self.parser.repeats[seed])
            number = self.parser.repeats[seed]
            if number in data:
                data[number] += 1
            else:
                data[number] = 1
        data = self.order_high_low_rep(data)
        self.average = round(self.average / (len(self.parser.repeats)))
        holder = []
        repeats = []
        max = 0
        for number in data:
            if data[number] > max:
                max = data[number]
            if (data[number] / max) > .01:
                holder.append(data[number])
                repeats.append(number)
        #clear graph space
        self.seeds_vs_repeats_bar.canvas.axes.clear()
        #xpos used to handle appropriate formatting for more bars being added in
        x_pos = [i for i, _ in enumerate(repeats)]
        #the following are plotting / formatting for the graph
        self.seeds_vs_repeats_bar.canvas.axes.bar(x_pos, holder)
        self.seeds_vs_repeats_bar.canvas.axes.set_xticks(x_pos)
        self.seeds_vs_repeats_bar.canvas.axes.set_xticklabels(repeats)
        self.seeds_vs_repeats_bar.canvas.axes.set_xlabel('Number of Repeats')
        self.seeds_vs_repeats_bar.canvas.axes.set_ylabel('Number of Sequences')
        self.seeds_vs_repeats_bar.canvas.axes.set_title(
            'Number of Sequences per Number of Repeats')
        #rects are all the bar objects in the graph
        rects = self.seeds_vs_repeats_bar.canvas.axes.patches
        rect_vals = []
        #this for loop will calculate the height and create an annotation for each bar
        for rect in rects:
            height = rect.get_height()
            temp = self.seeds_vs_repeats_bar.canvas.axes.text(
                rect.get_x() + rect.get_width() / 2,
                height,
                '%d' % int(height),
                ha='center',
                va='bottom')
            temp.set_visible(False)
            rect_vals.append(temp)
        #function used for when user cursor is hovering over the bar, if hovering over a bar, the
        #height annotatin will appear above the bar, otherwise it will be hidden
        def on_plot_hover(event):
            i = 0
            for rect in rects:
                height = rect.get_height()
                if rect.contains(event)[0]:
                    rect_vals[i].set_visible(True)
                else:
                    rect_vals[i].set_visible(False)

                i = i + 1

            self.seeds_vs_repeats_bar.canvas.draw()

        #statement to detect cursor hovering over the bars
        self.seeds_vs_repeats_bar.canvas.mpl_connect('motion_notify_event',
                                                     on_plot_hover)
        #must redraw after every change
        self.seeds_vs_repeats_bar.canvas.draw()

    #plots the repeats per ID number graph as line graph
    #this graph is connected to the repeats_vs_seeds_line.py file
    #to represent the widget space in the UI file
    def plot_repeats_vs_seeds(self):
        data = {}
        for seed in self.parser.repeats:
            number = self.parser.repeats[seed]
            if number in data:
                data[number] += 1
            else:
                data[number] = 1

        max = 0
        y1 = []
        x1 = []
        index = 0
        time = 0
        for number in self.order(data):
            time += 1

            if int(data[number]) > max:
                max = int(data[number])
                self.mode = number

            hold = 0
            while hold < data[number]:
                if index == int(round(len(self.parser.repeats) / 2)):
                    self.median = number
                x1.append(index)
                y1.append(number)
                index = index + 1
                hold += 1

        #clear axes
        self.repeats_vs_seeds_line.canvas.axes.clear()
        #the following are for plotting / formatting
        self.repeats_vs_seeds_line.canvas.axes.plot(x1, y1)
        self.repeats_vs_seeds_line.canvas.axes.set_xlabel('Seed ID Number')
        self.repeats_vs_seeds_line.canvas.axes.set_ylabel('Number of Repeats')
        self.repeats_vs_seeds_line.canvas.axes.set_title(
            'Number of Repeats per Seed ID Number')
        #always redraw at the end
        self.repeats_vs_seeds_line.canvas.draw()

    #fills min and max dropdown windows
    def fill_min_max(self, run_seed_fill=True):
        self.ready_chromo_min_max = False
        index = 1
        self.max_chromo.clear()
        self.min_chromo.clear()
        while index < self.max_repeats + 1:
            self.min_chromo.addItem(str(index))
            self.max_chromo.addItem(str(self.max_repeats + 1 - index))
            index += 1
        self.ready_chromo_min_max = True
        if run_seed_fill:
            self.fill_seed_id_chrom()

    #fill_seed_id_chrom will fill the seed ID dropdown, and create the chromosome graph
    def fill_seed_id_chrom(self):
        if self.ready_chromo_min_max == False:
            return
        if int(self.min_chromo.currentText()) > int(
                self.max_chromo.currentText()):
            self.ready_chromo_min_max = False
            self.max_chromo.clear()
            self.min_chromo.clear()
            self.ready_chromo_min_max = True
            self.fill_min_max(False)
            QtWidgets.QMessageBox.question(
                self, "Maximum cant be less than Minimum",
                "The Minimum number of repeats cant be more than the Maximum",
                QtWidgets.QMessageBox.Ok)
            self.fill_seed_id_chrom()
            return
        self.ready_chromo_make_graph = False
        self.chromo_seed.clear()
        any = False
        seqLength = int(self.sq.endo_info[self.endo_drop.currentText()][1])
        for seed in self.parser.repeats:
            if self.parser.repeats[seed] >= int(self.min_chromo.currentText(
            )) and self.parser.repeats[seed] <= int(
                    self.max_chromo.currentText()):
                any = True
                #temp = self.sq.compress(seed,64)
                self.chromo_seed.addItem(
                    str(
                        self.sq.decompress64(seed,
                                             slength=seqLength,
                                             toseq=True)))
        if any == False:
            QtWidgets.QMessageBox.question(
                self, "No matches found",
                "No seed that is within the specifications could be found",
                QtWidgets.QMessageBox.Ok)
            self.ready_chromo_min_max = False
            self.max_chromo.clear()
            self.min_chromo.clear()
            self.ready_chromo_min_max = True
            self.fill_min_max(False)
            self.fill_seed_id_chrom()
            return
        self.ready_chromo_make_graph = True
        self.chro_bar_data()

    def order(self, data_par):
        data = dict(data_par)
        data2 = []
        while len(data) > 0:
            max = 0
            for item in data:
                if item > max:
                    max = item
            data2.append(max)
            if len(data2) == 1:
                self.max_repeats = max
            del data[max]
        return data2

    def order_high_low_rep(self, dictionary):
        data = dict(dictionary)
        data_ordered = {}
        while len(data) > 0:
            max = 0
            max_index = 0
            for item in data:

                if data[item] > max:
                    max_index = item
                    max = data[item]

            data_ordered[max_index] = max

            del data[max_index]
        return data_ordered

    #connects to view->CASPER to switch back to the main CASPER window
    def changeto_main(self):
        GlobalSettings.mainWindow.show()
        self.hide()

    #connects to go back button in bottom left to switch back to the main CASPER window
    def go_back(self):
        GlobalSettings.mainWindow.show()
        self.hide()

    #-----------------------NOT USED----------------------------#
    def get_instances(self):
        ST = SeqTranslate()
        os.chdir(path)
        f = open(self.file_name, 'r')
        while True:
            x = f.readline()
            if x == 'REPEATS\n':
                print("reached repeat sequences")
                break
        while True:
            t = f.readline()
            if t == 'END_OF_FILE':
                print("reached end of repeat sequences")
                break
            ukey = t[:-1]  # takes away the "\n" in the string
            key = ST.decompress64(ukey, slength=20, toseq=True)
            key = ST.fill_As(key, 16)
            self.BAD_instances[key] = list()
            # Add sequences and locations to the list
            v = f.readline().split('\t')[:-1]
            for item in v:
                loctup = item.split(',')
                chrom = loctup[0]
                location = ST.decompress64(loctup[1])
                seq = ST.decompress64(loctup[2][1:], slength=20, toseq=True)
                seq = ST.fill_As(
                    seq, 4
                )  # when A's get lost in the compression this fills them back in
                mytup = (chrom, location, seq)
                self.BAD_instances[key].append(mytup)
        f.close()
        print("currently sorting")
        for key in self.BAD_instances:
            size = len(self.BAD_instances[key])
            newtuple = (key, self.BAD_instances[key], size
                        )  # sequence, location, size
            self.sorted_instances.append(newtuple)

    #not used
    # Returns the container self.sorted_instances but removes all "single" repeats. Old Code to fix an off-by-1 error
    def return_all_seqs(self):
        myseqs = []
        for instance in self.sorted_instances:
            if instance[2] > 1:
                myseqs.append(instance)
        return myseqs

    #not used
    def return_sorted(self):
        sorted_seqs = sorted(self.sorted_instances,
                             key=operator.itemgetter(2),
                             reverse=True)
        amounts = {}
        for instance in sorted_seqs:
            if instance[2] > 1:
                if instance[2] in amounts:
                    amounts[instance[2]] += 1
                else:
                    amounts[instance[2]] = 1
                print(
                    str(instance[0]) + "," + str(instance[2]) + "," +
                    str(instance[1]))
        for element in amounts:
            print("Number of seed sequences with " + str(element) +
                  " appearances: " + str(amounts[element]))

    #not used
    def return_positions(self):
        positions_mapped = [
        ]  # chromosme, beginning of range, end of range, and number of hits
        for instance in self.sorted_instances:
            if instance[2] > 1:
                for pos in instance[1]:
                    chrom = pos[0]
                    loc = int(pos[1])
                    # check to see if its already in the map
                    need_new = True
                    for position in positions_mapped:
                        if chrom == position[0]:
                            if position[1] < loc < position[2]:
                                position[3] += 1
                                position[4].append(instance[0])
                                need_new = False
                                print("position added")
                    if need_new:
                        newtuple = [
                            chrom, loc - 1000, loc + 1000, 1,
                            [" ", instance[0]]
                        ]
                        positions_mapped.append(newtuple)
        sorted_positions = sorted(positions_mapped,
                                  key=operator.itemgetter(3),
                                  reverse=True)
        for element in sorted_positions:
            print(
                str(element[0]) + "," + str(element[1]) + "," +
                str(element[2]) + "," + str(element[3]))
        for element in sorted_positions:
            sequences = ""
            for sequence in element[4]:
                sequences += sequence + ","
            print(sequences)
        return sorted_positions

    #not used
    def int_to_char(self, i):
        switcher = {0: 'A', 1: 'T', 2: 'C', 3: 'G'}
        return switcher[i]

    # ----------------------------------------------------------#

    # this function calls the closingWindow class.
    def closeEvent(self, event):
        GlobalSettings.mainWindow.closeFunction()
        event.accept()
    def __init__(self):
        super(Pop_Analysis, self).__init__()
        uic.loadUi('populationanalysis.ui', self)
        self.setWindowIcon(QtGui.QIcon("cas9image.png"))
        self.goBackButton.clicked.connect(self.go_back)
        self.analyze_button.clicked.connect(self.pre_analyze)
        self.clear_Button.clicked.connect(self.clear)
        self.ncbi_search_button.clicked.connect(self.launch_ncbi_seacher)
        self.meta_genomic_cspr_checkbox.stateChanged.connect(self.get_data)
        self.parser = CSPRparser("")
        self.Endos = dict()
        self.fna_files = dict()
        self.cspr_files = {}
        self.sq = Algorithms.SeqTranslate()
        self.ref_para_list = list()
        self.mode = 0
        self.find_locs_button.clicked.connect(self.find_locations)
        self.clear_loc_button.clicked.connect(self.clear_loc_table)
        self.directory = ""
        self.names = []
        self.names_venn = []
        self.show_names.clicked.connect(self.show_names_func)
        self.show_names2.clicked.connect(self.show_names_func2)
        self.name_form = show_nams_ui.show_names_table()
        self.name_form2 = show_names2_ui.show_names_table2()

        #orgonaism table
        self.org_Table.setColumnCount(1)
        self.org_Table.setShowGrid(False)
        self.org_Table.setHorizontalHeaderLabels(["Organism"])
        self.org_Table.horizontalHeader().setSectionsClickable(True)
        self.org_Table.setSelectionBehavior(
            QtWidgets.QAbstractItemView.SelectRows)
        self.org_Table.setEditTriggers(
            QtWidgets.QAbstractItemView.NoEditTriggers)
        self.org_Table.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.org_Table.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)

        #top right table
        self.table2.setColumnCount(9)
        self.table2.setShowGrid(False)
        self.table2.setHorizontalHeaderLabels([
            "Seed", "% Coverage", "Total Repeats", "Avg. Repeats/Chromosome",
            "Consensus Sequence", "% Consensus", "Score", "PAM", "Strand"
        ])
        self.table2.horizontalHeader().setSectionsClickable(True)
        self.table2.horizontalHeader().sectionClicked.connect(
            self.table2_sorting)
        self.table2.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.table2.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)
        self.table2.setSelectionBehavior(QtWidgets.QTableView.SelectRows)
        self.table2.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.table2.resizeColumnsToContents()

        #Finder table
        self.loc_finder_table.setColumnCount(5)
        self.loc_finder_table.setShowGrid(False)
        self.loc_finder_table.setHorizontalHeaderLabels(
            ["Seed ID", "Sequence", "Organism", "Chromosome", "Location"])
        self.loc_finder_table.horizontalHeader().setSectionsClickable(True)
        self.loc_finder_table.horizontalHeader().sectionClicked.connect(
            self.loc_table_sorter)
        self.loc_finder_table.setEditTriggers(
            QtWidgets.QAbstractItemView.NoEditTriggers)
        self.loc_finder_table.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)
        self.loc_finder_table.setSelectionBehavior(
            QtWidgets.QTableView.SelectRows)
        self.loc_finder_table.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.loc_finder_table.resizeColumnsToContents()

        # action buttons
        self.actionMetaGenome_Parser.triggered.connect(
            self.launch_chrom_selector)

        self.combinerWindow = fna_and_cspr_combiner()

        self.total_org_number = 0

        self.switcher_table2 = [
            1, 1, 1, 1, 1, 1, 1, 1, 1
        ]  # for keeping track of where we are in the sorting clicking for each column
        self.switcher_loc_table = [1, 1, 1, 1, 1]
class Pop_Analysis(QtWidgets.QMainWindow):
    def __init__(self):
        super(Pop_Analysis, self).__init__()
        uic.loadUi('populationanalysis.ui', self)
        self.setWindowIcon(QtGui.QIcon("cas9image.png"))
        self.goBackButton.clicked.connect(self.go_back)
        self.analyze_button.clicked.connect(self.pre_analyze)
        self.clear_Button.clicked.connect(self.clear)
        self.ncbi_search_button.clicked.connect(self.launch_ncbi_seacher)
        self.meta_genomic_cspr_checkbox.stateChanged.connect(self.get_data)
        self.parser = CSPRparser("")
        self.Endos = dict()
        self.fna_files = dict()
        self.cspr_files = {}
        self.sq = Algorithms.SeqTranslate()
        self.ref_para_list = list()
        self.mode = 0
        self.find_locs_button.clicked.connect(self.find_locations)
        self.clear_loc_button.clicked.connect(self.clear_loc_table)
        self.directory = ""
        self.names = []
        self.names_venn = []
        self.show_names.clicked.connect(self.show_names_func)
        self.show_names2.clicked.connect(self.show_names_func2)
        self.name_form = show_nams_ui.show_names_table()
        self.name_form2 = show_names2_ui.show_names_table2()

        #orgonaism table
        self.org_Table.setColumnCount(1)
        self.org_Table.setShowGrid(False)
        self.org_Table.setHorizontalHeaderLabels(["Organism"])
        self.org_Table.horizontalHeader().setSectionsClickable(True)
        self.org_Table.setSelectionBehavior(
            QtWidgets.QAbstractItemView.SelectRows)
        self.org_Table.setEditTriggers(
            QtWidgets.QAbstractItemView.NoEditTriggers)
        self.org_Table.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.org_Table.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)

        #top right table
        self.table2.setColumnCount(9)
        self.table2.setShowGrid(False)
        self.table2.setHorizontalHeaderLabels([
            "Seed", "% Coverage", "Total Repeats", "Avg. Repeats/Chromosome",
            "Consensus Sequence", "% Consensus", "Score", "PAM", "Strand"
        ])
        self.table2.horizontalHeader().setSectionsClickable(True)
        self.table2.horizontalHeader().sectionClicked.connect(
            self.table2_sorting)
        self.table2.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.table2.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)
        self.table2.setSelectionBehavior(QtWidgets.QTableView.SelectRows)
        self.table2.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.table2.resizeColumnsToContents()

        #Finder table
        self.loc_finder_table.setColumnCount(5)
        self.loc_finder_table.setShowGrid(False)
        self.loc_finder_table.setHorizontalHeaderLabels(
            ["Seed ID", "Sequence", "Organism", "Chromosome", "Location"])
        self.loc_finder_table.horizontalHeader().setSectionsClickable(True)
        self.loc_finder_table.horizontalHeader().sectionClicked.connect(
            self.loc_table_sorter)
        self.loc_finder_table.setEditTriggers(
            QtWidgets.QAbstractItemView.NoEditTriggers)
        self.loc_finder_table.setSizeAdjustPolicy(
            QtWidgets.QAbstractScrollArea.AdjustToContents)
        self.loc_finder_table.setSelectionBehavior(
            QtWidgets.QTableView.SelectRows)
        self.loc_finder_table.setSelectionMode(
            QtWidgets.QAbstractItemView.MultiSelection)
        self.loc_finder_table.resizeColumnsToContents()

        # action buttons
        self.actionMetaGenome_Parser.triggered.connect(
            self.launch_chrom_selector)

        self.combinerWindow = fna_and_cspr_combiner()

        self.total_org_number = 0

        self.switcher_table2 = [
            1, 1, 1, 1, 1, 1, 1, 1, 1
        ]  # for keeping track of where we are in the sorting clicking for each column
        self.switcher_loc_table = [1, 1, 1, 1, 1]

    def launch_ncbi_seacher(self):
        GlobalSettings.mainWindow.ncbi_search_dialog.searchProgressBar.setValue(
            0)
        GlobalSettings.mainWindow.ncbi_search_dialog.show()

    # launches the chromesome selector window
    def launch_chrom_selector(self):
        GlobalSettings.mainWindow.cspr_selector.launch(self.cspr_files)

    def launch(self, path):
        os.chdir(path)
        self.directory = path
        self.get_data()

    # this function clears the loc_finder_table
    def clear_loc_table(self):
        self.loc_finder_table.clearContents()
        self.loc_finder_table.setRowCount(0)

    def find_locations(self):
        selectedList = self.table2.selectedItems()
        tableIndex = 0

        org_chrom_num = list()
        tempD = dict()

        # error checking
        if len(selectedList) == 0:
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Please select at least 1 seed to find locations of.",
                QtWidgets.QMessageBox.Ok)
            self.loc_finder_table.setRowCount(0)
            return

        # get the chromosome numbers for each organism.
        counter = 0
        org_index = 0
        for item in self.ref_para_list:
            if item[0] not in tempD:
                if counter != 0:
                    org_chrom_num.append(counter)

                counter = 1
                tempD[item[0]] = org_index
                org_index += 1
            else:
                counter += 1
        if counter != 0:
            org_chrom_num.append(counter)

        # loop through and get the data
        for i in range(len(selectedList)):
            # we only want the first column's data for the popParser key
            if i % 9 == 0:
                currentSeed = selectedList[i].text()

                for item in self.parser.popData[currentSeed]:
                    self.loc_finder_table.setRowCount(tableIndex + 1)
                    tempSeq = item[3]
                    tempOrg = item[0]
                    tempChrom = int(item[1])
                    tempLoc = item[2]

                    # calculate the true chromosome number
                    tempIndex = tempD[tempOrg]
                    while tempIndex > 0:
                        tempChrom = tempChrom - org_chrom_num[int(tempIndex -
                                                                  1)]
                        tempIndex -= 1

                    tabSeed = QtWidgets.QTableWidgetItem()
                    tabSeed.setData(QtCore.Qt.EditRole, currentSeed)
                    tabSeq = QtWidgets.QTableWidgetItem()
                    tabSeq.setData(QtCore.Qt.EditRole, tempSeq)
                    tabOrg = QtWidgets.QTableWidgetItem()
                    tabOrg.setData(QtCore.Qt.EditRole, tempOrg)
                    tabChrom = QtWidgets.QTableWidgetItem()
                    tabChrom.setData(QtCore.Qt.EditRole, int(tempChrom))
                    tabLoc = QtWidgets.QTableWidgetItem()
                    tabLoc.setData(QtCore.Qt.EditRole, int(tempLoc))

                    self.loc_finder_table.setItem(tableIndex, 0, tabSeed)
                    self.loc_finder_table.setItem(tableIndex, 1, tabSeq)
                    self.loc_finder_table.setItem(tableIndex, 2, tabOrg)
                    self.loc_finder_table.setItem(tableIndex, 3, tabChrom)
                    self.loc_finder_table.setItem(tableIndex, 4, tabLoc)
                    tableIndex += 1

        self.loc_finder_table.resizeColumnsToContents()

        # this function builds the Select Organisms table

    def get_data(self):
        if self.directory == '':
            return

        onlyfiles = [
            f for f in os.listdir(self.directory)
            if os.path.isfile(os.path.join(self.directory, f))
        ]
        self.fna_files.clear()

        # show/hide the stuff that isn't needed
        if self.meta_genomic_cspr_checkbox.isChecked():
            self.endoBox.hide()
            self.ncbi_search_button.hide()
            self.label_3.hide()
            self.label_2.setText('Select 1 Meta Genomic CSPR File')
        elif not self.meta_genomic_cspr_checkbox.isChecked():
            self.endoBox.show()
            self.ncbi_search_button.show()
            self.label_3.show()
            self.label_2.setText('Select organism(s) and endonuclease:')

        # if the user wants the FNA/Fast files to be sown
        if not self.meta_genomic_cspr_checkbox.isChecked():
            index = 0
            for file in onlyfiles:
                if file.find('.fna') != -1 or file.find('.fasta') != -1:
                    # find the organism name
                    f = open(file, 'r')
                    hold = f.readline()
                    f.close()
                    # get the organism name
                    spaceIndex = hold.find(' ') + 1
                    commaIndex = hold.find(',')
                    buf = hold[spaceIndex:commaIndex]

                    # store the name in the dict of fna_files, that keys the name with the file path
                    self.fna_files[buf] = file

                    # store the data in the table
                    tabWidget = QtWidgets.QTableWidgetItem(buf)
                    self.org_Table.setRowCount(index + 1)
                    self.org_Table.setItem(index, 0, tabWidget)
                    index += 1
        # if the user wants the metagenomic cspr files to be shown
        else:
            index = 0
            for file in onlyfiles:
                if file.find('.cspr') != -1:
                    f = open(file, 'r')
                    hold = f.readline()
                    f.close()
                    # only show the files that are metagenomic
                    if '(meta)' in hold:
                        colonIndex = hold.find(':') + 1
                        commaIndex = hold.find(',')
                        orgName = hold[colonIndex:commaIndex]

                        self.fna_files[orgName] = file

                        tabWidget = QtWidgets.QTableWidgetItem(orgName)
                        self.org_Table.setRowCount(index + 1)
                        self.org_Table.setItem(index, 0, tabWidget)
                        index += 1
            if index == 0:
                self.org_Table.clearContents()
                self.org_Table.setRowCount(0)

        self.org_Table.resizeColumnsToContents()

        self.fillEndo()
        #self.changeEndos()

    # this function opens CASPERinfo and builds the dropdown menu of selectable endonucleases
    def fillEndo(self):
        if GlobalSettings.OPERATING_SYSTEM_ID == "Windows":
            f = open(GlobalSettings.appdir + "\\CASPERinfo")
        else:
            f = open(GlobalSettings.appdir + "/CASPERinfo")
        while True:
            line = f.readline()
            if line.startswith('ENDONUCLEASES'):
                while True:
                    line = f.readline()
                    if (line[0] == "-"):
                        break
                    line_tokened = line.split(";")
                    endo = line_tokened[0]
                    # Checking to see if there is more than one pam sequence in the list
                    if line_tokened[1].find(",") != -1:
                        p_pam = line_tokened[1].split(",")[0]
                    else:
                        p_pam = line_tokened[1]
                    default_seed_length = line_tokened[2]
                    default_tot_length = line_tokened[3]
                    self.Endos[endo + " PAM: " + p_pam] = (endo, p_pam,
                                                           default_seed_length,
                                                           default_tot_length)

                break
        f.close()
        self.endoBox.addItem("None Selected")
        self.endoBox.addItems(self.Endos.keys())
        #self.endoBox.currentIndexChanged.connect(self.changeEndos)

    # this function displays all of the organisms of which the user has that endo in their DB
    def changeEndos(self):
        endo_box = str(self.endoBox.currentText())
        endo_box = endo_box[:endo_box.find(" ")]
        self.org_Table.setRowCount(0)
        index = 0
        for keys in self.cspr_files.keys():
            filename = str(self.cspr_files[keys])
            endo = filename[filename.find("_") + 1:]
            endo = endo.replace(".cspr", "")
            if (endo == endo_box or endo_box == "None"):
                self.org_Table.setRowCount(index + 1)
                name = QtWidgets.QTableWidgetItem(str(keys))
                self.org_Table.setItem(index, 0, name)
                index += 1
        self.org_Table.resizeColumnsToContents()

    # this function calls the popParser function and fills all the tables
    def pre_analyze(self):
        # if the user is wanting to go with 1 meta genomic cspr file
        if self.meta_genomic_cspr_checkbox.isChecked():
            selectedList = self.org_Table.selectedItems()

            # error check
            if len(selectedList) == 0 or len(selectedList) > 1:
                QtWidgets.QMessageBox.question(
                    self, "Error",
                    "Please select no more than 1 CSPR file for analysis.",
                    QtWidgets.QMessageBox.Ok)
                return

            # get the cspr_file name, the endochoice, and call the popParser
            orgName = selectedList[0].text()
            cspr_file_name = self.fna_files[orgName]
            # split the file name by '_', then take that second index, split by '.', and then take the first index. Thus giving the Endo Choice
            endoChoice = cspr_file_name.split('_')[1].split('.')[0]
            # call the parser and the call fill_data
            cspr_file_name = GlobalSettings.CSPR_DB + '/' + cspr_file_name
            self.total_org_number, self.ref_para_list = self.parser.popParser(
                cspr_file_name, endoChoice)
            self.fill_data(endoChoice)
        # if the user is wanting to go with creating a new meta genomic cspr file
        else:
            selectedList = self.org_Table.selectedItems()

            # if the table is showing only fna/fasta files
            if not self.meta_genomic_cspr_checkbox.isChecked():
                # rules for selecting FNA/Fasta files
                # check to make sure that the user selected at least 2 organisms, and 1 endonuclease
                if len(selectedList) < 1 or self.endoBox.currentText(
                ) == 'None Selected':
                    QtWidgets.QMessageBox.question(
                        self, "Nothing Seleted",
                        "No items selected. Please select at least 1 organism, and only 1 endonuclease",
                        QtWidgets.QMessageBox.Ok)
                    return
                if len(selectedList) == 1:
                    error = QtWidgets.QMessageBox.question(
                        self, "Only 1 Organism Selected",
                        "Population Analysis works with multiple organisms, or a meta genome. If the file selected it not a meta genome, the program may not function correctly.\n\n"
                        "Do you wish to continue?",
                        QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
                        QtWidgets.QMessageBox.No)
                    if (error == QtWidgets.QMessageBox.No):
                        return

                submitList = list()
                for item in selectedList:
                    submitList.append(self.fna_files[item.text()])
                self.combinerWindow.launch(submitList)
            # rules for selecting cspr files
            elif self.meta_genomic_cspr_checkbox.isChecked():
                if len(selectedList) == 0:
                    QtWidgets.QMessageBox.question(
                        self, "Nothing Seleted",
                        "No items selected. Please select one meta genome for Population Analysis.",
                        QtWidgets.QMessageBox.Ok)
                    return
                elif len(selectedList) > 1:
                    QtWidgets.QMessageBox.question(
                        self, "Too many Selected",
                        "Only 1 meta genomic CSPR file is allowed to be selected",
                        QtWidgets.QMessageBox.Ok)
                    return

    # this function calculates the percentConserved for the table
    # it runs through and finds out how many different organisms each seed is repeated in
    # if it's equal to the total_org_number, it then returns 1, otherwise it returns a double
    def findPercentConserved(self, seed):
        tempSet = set()
        for item in self.parser.popData[seed]:
            tempSet.add(item[0])

        if self.total_org_number == len(tempSet):
            return 1
        else:
            return len(tempSet) / self.total_org_number

    # this function calculates the average repeats per chromosome for a seed
    # runs through the sequences in a seed and calculates it
    # returns the average
    def findAvgRepeats(self, seed):
        firstChrom = 0
        secondChrom = 0
        divideBy = 1
        tempSum = 0
        for item in self.parser.popData[seed]:
            firstChrom = item[1]

            if firstChrom != secondChrom and secondChrom != 0:
                divideBy += 1

            tempSum += 1
            secondChrom = item[1]

        return tempSum / divideBy

    # this function fills the top-right table
    def fill_data(self, endoChoice):
        self.table2.setRowCount(0)
        index = 0
        for seeds in self.parser.popData:
            self.table2.setRowCount(index + 1)

            seed = QtWidgets.QTableWidgetItem()
            total_repeats = QtWidgets.QTableWidgetItem()
            total_repeats.setData(QtCore.Qt.EditRole,
                                  len(self.parser.popData[seeds]))
            seed.setData(QtCore.Qt.EditRole, str(seeds))

            self.table2.setItem(index, 0, seed)
            self.table2.setItem(index, 2, total_repeats)
            tempPercentConserved = self.findPercentConserved(seeds) * 100
            percentTab = QtWidgets.QTableWidgetItem(
                str(tempPercentConserved) + '%')
            self.table2.setItem(index, 1, percentTab)

            # get the avg repeats per chromosome
            tempAvgRepeatsPerChrom = self.findAvgRepeats(seeds)
            rounded = float("%.2f" % tempAvgRepeatsPerChrom)
            avgTab = QtWidgets.QTableWidgetItem()
            avgTab.setData(QtCore.Qt.EditRole, rounded)
            self.table2.setItem(index, 3, avgTab)

            #loop through the tuples for each seed
            sequences = []
            for tuples in self.parser.popData[seeds]:
                sequences.append(tuples[3])

            # set consensus seq
            con_seq_temp = str(max(set(sequences), key=sequences.count))
            conIndex = sequences.index(con_seq_temp)
            consensus_seq = QtWidgets.QTableWidgetItem()
            consensus_seq.setData(QtCore.Qt.EditRole, con_seq_temp)
            self.table2.setItem(index, 4, consensus_seq)

            # get the data for the rest
            tabScore = QtWidgets.QTableWidgetItem()
            tabScore.setData(QtCore.Qt.EditRole,
                             int(self.parser.popData[seeds][conIndex][5]))
            tabPAM = QtWidgets.QTableWidgetItem(
                self.parser.popData[seeds][conIndex][4])
            tabStrand = QtWidgets.QTableWidgetItem(
                self.parser.popData[seeds][conIndex][6])

            # set all that data
            self.table2.setItem(index, 6, tabScore)
            self.table2.setItem(index, 7, tabPAM)
            self.table2.setItem(index, 8, tabStrand)

            # set consensus percentage
            consensus_percentage = float(
                sequences.count(con_seq_temp) / len(sequences) * 100)
            consensus_percentage = round(consensus_percentage, 2)
            consensus_percentage = format(consensus_percentage, '.2f')
            consensus_perc = QtWidgets.QTableWidgetItem()
            consensus_perc.setData(QtCore.Qt.EditRole, consensus_percentage)
            self.table2.setItem(index, 5, consensus_perc)

            index += 1
        self.table2.resizeColumnsToContents()
        self.plot_repeats_vs_seeds(endoChoice)
        self.plot_3D_graph(endoChoice)
        self.plot_venn()

    def clear(self):
        self.table2.setRowCount(0)

    # this function graphs the repeats_vs_seeds graph
    def plot_repeats_vs_seeds(self, endoChoice):
        data = {}
        for seed in self.parser.popData:
            number = 0
            for repeat in self.parser.popData[seed]:
                number += 1
            if number in data:
                data[number] += 1
            else:
                data[number] = 1

        max = 0
        y1 = []
        x1 = []
        plots = []
        time = 0
        index = 0

        for number in self.order(data):
            time += 1

            if int(data[number]) > max:
                max = int(data[number])
                self.mode = number

            hold = 0
            while hold < data[number]:
                if index == int(round(len(self.parser.popData) / 2)):
                    self.median = number
                x1.append(index)
                y1.append(number)

                index = index + 1
                hold += 1

        # now plot the stuff
        self.pop_analysis_repeats_graph.canvas.axes.clear()
        # set everything up
        self.pop_analysis_repeats_graph.canvas.axes.plot(x1, y1)
        self.pop_analysis_repeats_graph.canvas.axes.set_xlabel(
            'Seed ID Number')
        self.pop_analysis_repeats_graph.canvas.axes.set_ylabel(
            'Number of Repeats')
        self.pop_analysis_repeats_graph.canvas.axes.set_title(
            'Number of Repeats per Seed ID Number')
        # now draw
        self.pop_analysis_repeats_graph.canvas.draw()

    #this function is for the 3D bar graph
    def plot_3D_graph(self, endoChoice):

        rows, cols = (self.total_org_number, self.total_org_number)
        arr = [[0 for i in range(cols)] for j in range(rows)]

        x3 = []
        y3 = []
        z3 = np.zeros(
            int((self.total_org_number * (self.total_org_number - 1)) / 2))
        dz = []
        self.names = []
        dx = np.ones(
            int((self.total_org_number * (self.total_org_number - 1)) / 2))
        dy = np.ones(
            int((self.total_org_number * (self.total_org_number - 1)) / 2))

        for keys in self.parser.popData:
            for items in self.parser.popData[keys]:
                if items[0] not in self.names:
                    self.names.append(items[0])

        for keys in self.parser.popData:
            temp_names = []
            for items in self.parser.popData[keys]:
                if items[0] not in temp_names:
                    temp_names.append(items[0])

            if len(temp_names) >= 2:
                for i in range(len(temp_names) - 1):
                    j = i + 1
                    while j != len(temp_names):
                        arr[self.names.index(temp_names[i])][self.names.index(
                            temp_names[j])] += 1
                        arr[self.names.index(temp_names[j])][self.names.index(
                            temp_names[i])] += 1
                        j += 1

        for j in range(cols):
            i = len(self.names) - 1
            while i != j:
                x3.append(i)
                y3.append(j)
                dz.append(arr[i][j])
                i -= 1

        self.pop_analysis_3dgraph.canvas.axes.clear()
        self.pop_analysis_3dgraph.canvas.axes.bar3d(x3, y3, z3, dx, dy, dz)

        new_names = []

        for n in range(len(self.names)):
            new_names.append(n)

        self.pop_analysis_3dgraph.canvas.axes.set_xlabel('x')
        self.pop_analysis_3dgraph.canvas.axes.set_ylabel('y')
        self.pop_analysis_3dgraph.canvas.axes.set_zlabel('z')

        self.pop_analysis_3dgraph.canvas.axes.set_xticks(
            np.arange(1, self.total_org_number + 1, 1))
        self.pop_analysis_3dgraph.canvas.axes.set_yticks(
            np.arange(0, self.total_org_number, 1))

        self.pop_analysis_3dgraph.canvas.axes.tick_params(labelsize=8)
        self.pop_analysis_3dgraph.canvas.axes.set_xticklabels(new_names,
                                                              rotation=45)
        self.pop_analysis_3dgraph.canvas.axes.set_yticklabels(new_names,
                                                              rotation=-45)

        self.pop_analysis_3dgraph.canvas.draw()

    def plot_venn(self):
        self.pop_analysis_venn_diagram.canvas.figure.clf()
        #self.pop_analysis_venn_diagram.canvas = FigureCanvas(plt.figure(figsize=(7.5,7.5)))

        rows, cols = (self.total_org_number, self.total_org_number)
        arr = [[0 for i in range(cols)] for j in range(rows)]
        all_3 = 0
        singles = [0 for i in range(cols)]
        counter = 0
        self.names_venn = []

        for keys in self.parser.popData:
            for items in self.parser.popData[keys]:
                if items[0] not in self.names_venn:
                    self.names_venn.append(items[0])

        if len(self.names_venn) >= 3:
            for keys in self.parser.popData:
                temp_names = []
                for items in self.parser.popData[keys]:
                    if items[0] not in temp_names:
                        temp_names.append(items[0])

                if len(temp_names) >= 2:
                    for i in range(len(temp_names) - 1):
                        j = i + 1
                        while j != len(temp_names):
                            arr[self.names_venn.index(
                                temp_names[i])][self.names_venn.index(
                                    temp_names[j])] += 1
                            arr[self.names_venn.index(
                                temp_names[j])][self.names_venn.index(
                                    temp_names[i])] += 1
                            j += 1
                else:
                    if temp_names[
                            0] == 'Corynebacterium tuberculostearicum SK141 contig00004':
                        counter += 1
                    singles[self.names_venn.index(temp_names[0])] += 1

            #all 3 orgs
            for keys in self.parser.popData:
                temp_names = []
                for items in self.parser.popData[keys]:
                    if items[0] not in temp_names:
                        temp_names.append(items[0])
                if all(x in temp_names for x in [
                        self.names_venn[0], self.names_venn[1],
                        self.names_venn[2]
                ]):
                    all_3 += 1

            venn3_unweighted(subsets=(singles[0], singles[1], arr[0][1],
                                      singles[2], arr[0][2], arr[1][1], all_3),
                             set_labels=('0', '1', '2'))

            self.pop_analysis_venn_diagram.canvas.draw()
        else:
            self.pop_analysis_venn_diagram.canvas.figure.clf()
            self.pop_analysis_venn_diagram.canvas.draw()

            # del self.pop_analysis_venn_diagram.canvas.axes
            # self.pop_analysis_venn_diagram.canvas.draw()

            #self.pop_analysis_venn_diagram.canvas.axes = venn3_unweighted(
            #subsets=('null', 'null', 'null', 'null', 'null', 'null', 'null'), set_labels=('Org. 1', 'Org. 2', 'Org. 3'))

    def show_names_func(self):
        #print(self.names)
        self.name_form.fill_table(self.names)
        self.name_form.show()

    def show_names_func2(self):
        #print(self.names)
        if len(self.names_venn) >= 3:
            self.name_form2.fill_table(self.names_venn[0:3])
            self.name_form2.show()
        else:
            self.name_form2.name_table2.setRowCount(0)

    def order(self, data_par):
        data = dict(data_par)
        data2 = []
        while len(data) > 0:
            max = 0
            for item in data:
                if item > max:
                    max = item
            data2.append(max)
            if len(data2) == 1:
                self.max_repeats = max
            del data[max]
        return data2

    def go_back(self):
        GlobalSettings.mainWindow.getData()
        GlobalSettings.mainWindow.show()
        self.hide()

    # this function calls the close window class. Allows the user to choose what files they want to keep/delete
    def closeEvent(self, event):
        GlobalSettings.mainWindow.closeFunction()
        event.accept()

    def table_sorting(self, logicalIndex):
        self.switcher[logicalIndex] *= -1
        if self.switcher[logicalIndex] == -1:
            self.table2.sortItems(logicalIndex, QtCore.Qt.DescendingOrder)
        else:
            self.table2.sortItems(logicalIndex, QtCore.Qt.AscendingOrder)

    # sorting to table2: IE the table in top-right
    def table2_sorting(self, logicalIndex):
        self.switcher_table2[logicalIndex] *= -1
        if self.switcher_table2[logicalIndex] == -1:
            self.table2.sortItems(logicalIndex, QtCore.Qt.DescendingOrder)
        else:
            self.table2.sortItems(logicalIndex, QtCore.Qt.AscendingOrder)

    # sorting for location table: IE table in bottom right
    def loc_table_sorter(self, logicalIndex):
        self.switcher_loc_table[logicalIndex] *= -1
        if (self.switcher_loc_table[logicalIndex] == -1):
            self.loc_finder_table.sortItems(logicalIndex,
                                            QtCore.Qt.DescendingOrder)
        else:
            self.loc_finder_table.sortItems(logicalIndex,
                                            QtCore.Qt.AscendingOrder)
Beispiel #8
0
    def __init__(self, parent=None):
        super(Multitargeting, self).__init__()
        uic.loadUi(GlobalSettings.appdir + 'multitargetingwindow.ui', self)
        self.setWindowIcon(QtGui.QIcon(GlobalSettings.appdir +
                                       "cas9image.png"))

        self.sq = SeqTranslate()  # SeqTranslate object used in class

        # Initializes the three graphs
        self.chart_view_chro_bar = QChartView()
        self.chart_view_repeat_bar = QChartView()
        self.chart_view_repeat_line = QChartView()

        self.data = ""
        self.shortHand = ""
        self.chromo_length = list()

        # Listeners for changing the seed sequence or the .cspr file
        self.chromo_seed.currentIndexChanged.connect(self.seed_chromo_changed)
        self.update_min_max.clicked.connect(self.update)
        self.Analyze_Button.clicked.connect(self.make_graphs)

        # go back to main button
        self.back_button.clicked.connect(self.go_back)

        # Tool Bar options
        self.actionCASPER.triggered.connect(self.changeto_main)

        # Statistics storage variables
        self.max_repeats = 1
        self.average = 0
        self.median = 0
        self.mode = 0
        self.average_unique = 0
        self.average_rep = 0
        self.bar_coords = []
        self.seed_id_seq_pair = {}

        # parser object
        self.parser = CSPRparser("")

        self.ready_chromo_min_max = True
        self.ready_chromo_make_graph = True
        self.directory = 'Cspr files'
        self.info_path = os.getcwd()

        ##################################
        self.scene = QtWidgets.QGraphicsScene()
        self.graphicsView.setScene(self.scene)
        self.scene2 = QtWidgets.QGraphicsScene()
        self.graphicsView_2.setScene(self.scene2)
        self.graphicsView.viewport().installEventFilter(self)

        self.loading_window = loading_window()
        screen = QtGui.QGuiApplication.screenAt(QtGui.QCursor().pos())

        self.mwfg = self.frameGeometry()  ##Center window
        self.cp = QtWidgets.QDesktopWidget().availableGeometry().center(
        )  ##Center window
        self.mwfg.moveCenter(self.cp)  ##Center window
        self.move(self.mwfg.topLeft())  ##Center window
        self.hide()
Beispiel #9
0
class genLibrary(QtWidgets.QDialog):
    def __init__(self):
        # qt stuff
        super(genLibrary, self).__init__()
        uic.loadUi('library_prompt.ui', self)
        self.setWindowTitle('Generate Library')
        self.setWindowIcon(Qt.QIcon('cas9image.png'))

        # button connections
        self.cancel_button.clicked.connect(self.cancel_function)
        self.BrowseButton.clicked.connect(self.browse_function)
        self.submit_button.clicked.connect(self.submit_data)
        self.progressBar.setValue(0)

        # variables
        self.anno_data = dict()
        self.cspr_file = ''
        self.parser = CSPRparser('')
        self.kegg_nonKegg = ''
        self.gen_lib_dict = dict()
        self.S = SeqTranslate()
        self.cspr_data = dict()
        self.Output = dict()
        self.off_tol = .05
        self.off_max_misMatch = 4
        self.off_target_running = False

        # set the numbers for the num genes combo box item
        for i in range(10):
            self.numGenescomboBox.addItem(str(i + 1))

        # set the numbers for the minOn combo box
        for i in range(19, 70):
            self.minON_comboBox.addItem(str(i + 1))

    # this function launches the window
    # Parameters:
    #       annotation_data: a dictionary that has the data for the annotations searched for
    #           currently MainWindow's searches dict is passed into this
    #       org_file: the cspr_file that pertains to the organism that user is using at the time
    #       anno_type: whether the user is using KEGG or another type of annotation file
    def launch(self, annotation_data, org_file, anno_type):

        self.cspr_file = org_file
        self.anno_data = annotation_data
        self.kegg_nonKegg = anno_type
        self.parser.fileName = self.cspr_file
        self.process = QtCore.QProcess()

        # setting the path and file name fields
        index1 = self.cspr_file.find('.')
        index2 = self.cspr_file.rfind('/')
        self.filename_input.setText(self.cspr_file[index2 + 1:index1] +
                                    '_lib.txt')
        self.output_path.setText(GlobalSettings.CSPR_DB + "/")

        # testing:
        #for data in self.anno_data:
        #   print(data)
        #  for item in self.anno_data[data]:
        #     print('\t', item)
        #    for piece in self.anno_data[data][item]:
        #       print('\t\t', piece)
        # print(self.kegg_nonKegg)

        # depending on the type of file, build the dictionary accordingly
        if self.kegg_nonKegg == 'kegg':
            self.build_dict_kegg_version()
        else:
            self.build_dict_non_kegg()

        # get the data from the cspr file
        self.cspr_data = self.parser.gen_lib_parser(
            self.gen_lib_dict,
            GlobalSettings.mainWindow.endoChoice.currentText())
        #self.generate(5, 200000000000, 15, "mybsulibrary2.txt")

        #for i in range(len(self.cspr_data)):
        #   for j in range(len(self.cspr_data[i])):
        #      print(self.cspr_data[i][j])
        # print('\n\n')

        self.show()

    # this is here in case the user clicks 'x' instead of cancel. Just calls the cancel function
    def closeEvent(self, event):
        closeWindow = self.cancel_function()

        # if the user is doing OT and does not decide to cancel it ignore the event
        if closeWindow == -2:
            event.ignore()
        else:
            event.accept()

    # this function takes all of the cspr data and compresses it again for off-target usage
    def compress_file_off(self):
        f = open(GlobalSettings.CSPR_DB + "/off_compressed.txt", 'w')

        for gene in self.cspr_data:
            for j in range(len(self.cspr_data[gene])):
                loc = self.S.compress(self.cspr_data[gene][j][0], 64)
                seq = self.S.compress(self.cspr_data[gene][j][1], 64)
                pam = self.S.compress(self.cspr_data[gene][j][2], 64)
                score = self.S.compress(self.cspr_data[gene][j][3], 64)
                strand = self.cspr_data[gene][j][4]

                output = str(loc) + ',' + str(seq) + str(strand) + str(
                    pam) + ',' + score
                f.write(output + '\n')
        f.close()

    # this function parses the temp_off file, which holds the off-target analysis results
    # it also updates each target in the cspr_data dictionary to replace the endo with the target's results in off-target
    def parse_off_file(self):
        f = open(GlobalSettings.CSPR_DB + '/temp_off.txt')
        file_data = f.read().split('\n')
        f.close()
        scoreDict = dict()

        # get the data from the file
        for i in range(len(file_data)):
            if file_data[i] == 'AVG OUTPUT':
                continue
            elif file_data[i] != '':
                buffer = file_data[i].split(':')
                scoreDict[buffer[0]] = buffer[1]

        # update cspr_Data
        for gene in self.cspr_data:
            for i in range(len(self.cspr_data[gene])):
                tempTuple = (self.cspr_data[gene][i][0],
                             self.cspr_data[gene][i][1],
                             self.cspr_data[gene][i][2],
                             self.cspr_data[gene][i][3],
                             self.cspr_data[gene][i][4],
                             scoreDict[self.cspr_data[gene][i][1]])
                self.cspr_data[gene][i] = tempTuple

    # this function runs the off_target command
    # NOTE: some changes may be needed to get it to work with other OS besides windows
    def get_offTarget_data(self, num_targets, minScore, spaceValue,
                           output_file, fiveseq):
        self.perc = False
        self.bool_temp = False
        self.running = False

        # when finished, parse the off file, and then generate the lib
        def finished():
            if self.off_target_running:
                self.progressBar.setValue(100)
                self.parse_off_file()
                did_work = self.generate(num_targets, minScore, spaceValue,
                                         output_file, fiveseq)
                self.off_target_running = False
                #self.process.kill()
                if did_work != -1:
                    self.cancel_function()
                    os.remove(GlobalSettings.CSPR_DB + '/off_compressed.txt')
                    os.remove(GlobalSettings.CSPR_DB + '/temp_off.txt')

        # as off-targeting outputs things, update the off-target progress bar
        def progUpdate(p):
            line = str(p.readAllStandardOutput())
            line = line[2:]
            line = line[:len(line) - 1]
            for lines in filter(None, line.split(r'\r\n')):
                if (lines.find("Running Off Target Algorithm for") != -1
                        and self.perc == False):
                    self.perc = True
                if (self.perc == True and self.bool_temp == False and
                        lines.find("Running Off Target Algorithm for") == -1):
                    lines = lines[32:]
                    lines = lines.replace("%", "")
                    if (float(lines) <= 99.5):
                        num = float(lines)
                        self.progressBar.setValue(num)
                    else:
                        self.bool_temp = True

        app_path = GlobalSettings.appdir
        exe_path = app_path + '\OffTargetFolder\OT'
        exe_path = '"' + exe_path + '" '
        data_path = '"' + GlobalSettings.CSPR_DB.replace(
            '/', '\\') + "\\off_compressed.txt" + '" '
        compressed = r' True '  ##
        cspr_path = '"' + self.cspr_file.replace('/', '\\') + '" '
        output_path = '"' + GlobalSettings.CSPR_DB.replace(
            '/', '\\') + '\\temp_off.txt" '
        filename = output_path
        filename = filename[:len(filename) - 1]
        filename = filename[1:]
        filename = filename.replace('"', '')
        CASPER_info_path = r' "' + app_path + '\\CASPERinfo' + '" '
        num_of_mismathes = self.off_max_misMatch
        tolerance = self.off_tol  # create command string

        detailed_output = " False "
        avg_output = "True"
        # set the off_target_running to true, to keep the user from closing the window while it is running
        self.off_target_running = True
        cmd = exe_path + data_path + compressed + cspr_path + output_path + CASPER_info_path + str(
            num_of_mismathes) + ' ' + str(
                tolerance) + detailed_output + avg_output

        #print(cmd)
        self.process.readyReadStandardOutput.connect(
            partial(progUpdate, self.process))
        self.progressBar.setValue(0)
        QtCore.QTimer.singleShot(100, partial(self.process.start, cmd))
        self.process.finished.connect(finished)

    # submit function
    # this function takes all of the input from the window, and calls the generate function
    # Still need to add the checks for 5' seq, and the percentage thing
    def submit_data(self):
        if self.off_target_running:
            return
        output_file = self.output_path.text() + self.filename_input.text()
        minScore = int(self.minON_comboBox.currentText())
        num_targets = int(self.numGenescomboBox.currentText())
        fiveseq = ''

        # error check for csv or txt files
        if not output_file.endswith(
                '.txt') and not self.to_csv_checkbox.isChecked():
            if output_file.endswith('.csv'):
                output_file = output_file.replace('.csv', '.txt')
            else:
                output_file = output_file + '.txt'
        elif self.to_csv_checkbox.isChecked():
            if output_file.endswith('.txt'):
                output_file = output_file.replace('.txt', '.csv')
            elif not output_file.endswith('.txt') and not output_file.endswith(
                    '.csv'):
                output_file = output_file + '.csv'

        # error checking for the space value
        # if they enter nothing, default to 15 and also make sure it's actually a digit
        if self.space_line_edit.text() == '':
            spaceValue = 15
        elif self.space_line_edit.text().isdigit():
            spaceValue = int(self.space_line_edit.text())
        elif not self.space_line_edit.text().isdigit():
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Please enter integers only for space between guides.",
                QtWidgets.QMessageBox.Ok)
            return
        # if space value is more than 200, default to 200
        if spaceValue > 200:
            spaceValue = 200
        elif spaceValue < 0:
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Please enter a space-value that is 0 or greater.",
                QtWidgets.QMessageBox.Ok)
            return

        if self.find_off_Checkbox.isChecked():
            self.compress_file_off()

        # get the fiveprimseq data and error check it
        if self.fiveprimeseq.text() != '' and self.fiveprimeseq.text().isalpha(
        ):
            fiveseq = self.fiveprimeseq.text()
        elif self.fiveprimeseq.text() != '' and not self.fiveprimeseq.text(
        ).isalpha():
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Please make sure only the letters A, T, G, or C are added into 5' End specificity box.",
                QtWidgets.QMessageBox.Ok)
            return

        # get the targeting range data, and error check it here
        if not self.start_target_range.text().isdigit(
        ) or not self.end_target_range.text().isdigit():
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Error: Please make sure that the start and end target ranges are numbers only."
                " Please make sure that start is 0 or greater, and end is 100 or less. ",
                QtWidgets.QMessageBox.Ok)
            return
        elif int(self.start_target_range.text()) >= int(
                self.end_target_range.text()):
            QtWidgets.QMessageBox.question(
                self, "Error",
                "Please make sure that the start number is always less than the end number",
                QtWidgets.QMessageBox.Ok)
            return

        # if they check Off-Targeting
        if self.find_off_Checkbox.isChecked():
            # make sure its a digit
            if self.maxOFF_comboBox.text(
            ) == '' or not self.maxOFF_comboBox.text().isdigit(
            ) and '.' not in self.maxOFF_comboBox.text():
                QtWidgets.QMessageBox.question(
                    self, "Error",
                    "Please enter only numbers for Maximum Off-Target Score. It cannot be left blank",
                    QtWidgets.QMessageBox.Ok)
                return
            else:
                # make sure it between 0 and .5
                if not 0.0 < float(self.maxOFF_comboBox.text()) < .5:
                    QtWidgets.QMessageBox.question(
                        self, "Error",
                        "Please enter a max off target score between 0 and .5!",
                        QtWidgets.QMessageBox.Ok)
                    return
                # compress the data, and then run off-targeting
                self.compress_file_off()
                self.get_offTarget_data(num_targets, minScore, spaceValue,
                                        output_file, fiveseq)
        else:
            # actually call the generaete function
            did_work = self.generate(num_targets, minScore, spaceValue,
                                     output_file, fiveseq)

            if did_work != -1:
                self.cancel_function()

    # cancel function
    # clears everything and hides the window
    def cancel_function(self):
        if self.off_target_running:
            error = QtWidgets.QMessageBox.question(
                self, "Off-Targeting is running",
                "Off-Targetting is running. Closing this window will cancel that process, and return to the main window. .\n\n"
                "Do you wish to continue?",
                QtWidgets.QMessageBox.Yes | QtWidgets.QMessageBox.No,
                QtWidgets.QMessageBox.No)
            if (error == QtWidgets.QMessageBox.No):
                return -2
            else:
                self.off_target_running = False
                self.process.kill()

        self.cspr_file = ''
        self.anno_data = dict()
        self.kegg_nonKegg = ''

        self.filename_input.setText('')
        self.output_path.setText('')

        self.gen_lib_dict.clear()
        self.cspr_data.clear()
        self.Output.clear()

        self.start_target_range.setText('0')
        self.end_target_range.setText('100')
        self.space_line_edit.setText('15')
        self.to_csv_checkbox.setChecked(False)
        self.find_off_Checkbox.setChecked(False)
        self.modifyParamscheckBox.setChecked(False)
        self.maxOFF_comboBox.setText('')
        self.fiveprimeseq.setText('')
        self.off_target_running = False
        self.progressBar.setValue(0)
        self.output_all_checkbox.setChecked(False)

        self.hide()

    # browse function
    # allows the user to browse for a folder
    # stores their selection in the output_path line edit
    def browse_function(self):
        if self.off_target_running:
            return
        # get the folder
        filed = QtWidgets.QFileDialog()
        mydir = QtWidgets.QFileDialog.getExistingDirectory(
            filed, "Open a Folder", GlobalSettings.CSPR_DB,
            QtWidgets.QFileDialog.ShowDirsOnly)
        if (os.path.isdir(mydir) == False):
            return

        # make sure to append the '/' to the folder path
        self.output_path.setText(mydir + "/")

    # this function builds the dictionary that is used in the generate function
    # this is the version that builds it from the KEGG data
    # builds it exactly as Brian built it in the files given
    def build_dict_kegg_version(self):
        for search in self.anno_data:
            for gene in self.anno_data[search]:
                for i in range(len(self.anno_data[search][gene])):
                    self.gen_lib_dict[gene] = [
                        self.anno_data[search][gene][i][0],
                        self.anno_data[search][gene][i][2],
                        self.anno_data[search][gene][i][3],
                        self.anno_data[search][gene][i][1]
                    ]

    # this function builds the dictionary that is used in the generate function
    # this is the version that builds it from data from feature_table, gbff, or gff
    # builds it exactly as Brian built it in the files given
    def build_dict_non_kegg(self):
        for search in self.anno_data:
            for gene in self.anno_data[search]:
                descript = gene.split(';')
                temp_descript = descript[0]
                if temp_descript == 'hypothetical protein':
                    temp_descript = temp_descript + " " + str(
                        self.anno_data[search][gene][0][3])

                temp_descript = temp_descript + '||' + descript[len(descript) -
                                                                1]

                self.gen_lib_dict[temp_descript] = [
                    self.anno_data[search][gene][0][1],
                    self.anno_data[search][gene][0][3],
                    self.anno_data[search][gene][0][4],
                    self.anno_data[search][gene][0][5]
                ]

    # generate function taken from Brian's code
    def generate(self, num_targets_per_gene, score_limit, space, output_file,
                 fiveseq):
        deletedDict = dict()

        # check and see if we need to search based on target_range
        startNum = float(self.start_target_range.text())
        endNum = float(self.end_target_range.text())
        checkStartandEndBool = False
        if startNum != 0.0 or endNum != 100.0:
            startNum = startNum / 100
            endNum = endNum / 100
            checkStartandEndBool = True

        for gene in self.gen_lib_dict:
            #print(self.gen_lib_dict[gene])
            target_list = self.cspr_data[
                gene]  # Gets the chromosome the gene is on

            #target_list = chrom_list[k:l+1]
            # Reverse the target list if the gene is on negative strand:
            if self.gen_lib_dict[gene][3] == "-":
                target_list.reverse()

            # Filter out the guides with low scores and long strings of T's
            # also store the ones deleted if the user selects 'modify search parameters'
            if self.modifyParamscheckBox.isChecked():
                deletedDict[gene] = list()
            for i in range(len(target_list) - 1, -1, -1):
                # check the target_range here
                if target_list[i][3] < score_limit:
                    if self.modifyParamscheckBox.isChecked():
                        deletedDict[gene].append(target_list[i])
                    target_list.pop(i)
                # check for T's here
                # what is this??? and shouldn't it be pulled out into its own loop?
                elif re.search("T{5,10}", target_list[i][1]) is not None:
                    if self.modifyParamscheckBox.isChecked():
                        deletedDict[gene].append(target_list[i])
                    target_list.pop(i)

            # check for the fiveseq
            if fiveseq != '':
                for i in range(len(target_list) - 1, -1, -1):
                    if not target_list[i][1].startswith(fiveseq.upper()):
                        if self.modifyParamscheckBox.isChecked():
                            deletedDict[gene].append(target_list[i])
                        target_list.pop(i)
            # check the target range here
            if checkStartandEndBool:
                for i in range(len(target_list) - 1, -1, -1):
                    totalDistance = self.gen_lib_dict[gene][
                        2] - self.gen_lib_dict[gene][1]
                    target_loc = target_list[i][0] - self.gen_lib_dict[gene][1]

                    myRatio = target_loc / totalDistance

                    if not (startNum <= myRatio <= endNum):
                        if self.modifyParamscheckBox.isChecked():
                            deletedDict[gene].append(target_list[i])
                        target_list.pop(i)
            # if the user selected off-targetting, check to see that the targets do not exceed the selected max score
            if self.find_off_Checkbox.isChecked():
                maxScore = float(self.maxOFF_comboBox.text())
                for i in range(len(target_list) - 1, -1, -1):
                    if maxScore < float(target_list[i][5]):
                        if self.modifyParamscheckBox.isChecked():
                            deletedDict[gene].append(target_list[i])
                        target_list.pop(i)
            # Now generating the targets
            self.Output[gene] = list()
            i = 0
            vec_index = 0
            prev_target = (0, "xyz", 'abc', 1, "-")
            while i < num_targets_per_gene:
                # select the first five targets with the score and space filter that is set in the beginning
                if len(target_list) == 0 or vec_index >= len(target_list):
                    break
                while abs(target_list[vec_index][0] - prev_target[0]) < space:
                    if target_list[vec_index][3] > prev_target[
                            3] and prev_target != (0, "xyz", "abc", 1, "-"):
                        self.Output[gene].remove(prev_target)
                        self.Output[gene].append(target_list[vec_index])
                        prev_target = target_list[vec_index]
                    vec_index += 1
                    # check and see if there will be a indexing error
                    if vec_index >= len(target_list) - 1:
                        vec_index = vec_index - 1
                        break
                # Add the new target to the output and add another to i
                self.Output[gene].append(target_list[vec_index])
                prev_target = target_list[vec_index]
                i += 1
                vec_index += 1

        # if the user selects modify search parameters, go through and check to see if each one has the number of targets that the user wanted
        # if not, append from the deletedDict until they do
        if self.modifyParamscheckBox.isChecked():
            for gene in self.Output:
                if len(self.Output[gene]) < num_targets_per_gene:
                    for i in range(len(deletedDict[gene])):
                        if len(self.Output[gene]) == num_targets_per_gene:
                            break
                        else:
                            loc = deletedDict[gene][i][0]
                            seq = deletedDict[gene][i][1]
                            pam = deletedDict[gene][i][2]
                            score = deletedDict[gene][i][3]
                            strand = deletedDict[gene][i][4] + '*'
                            endo = deletedDict[gene][i][5]
                            self.Output[gene].append(
                                (loc, seq, pam, score, strand, endo))
        """
        for essential in self.Output:
            print(essential)
            for i in range(len(self.Output[essential])):
                print('\t', self.Output[essential][i])
        print('***********************')
        """

        # Now output to the file
        try:
            f = open(output_file, 'w')

            # if both OT and output all are checked
            if self.find_off_Checkbox.isChecked(
            ) and self.output_all_checkbox.isChecked():
                f.write(
                    'Gene Name,Sequence,On-Target Score,Off-Target Score,Location,PAM,Strand\n'
                )
            # if only output all is checked
            elif not self.find_off_Checkbox.isChecked(
            ) and self.output_all_checkbox.isChecked():
                f.write(
                    'Gene Name,Sequence,On-Target Score,Location,PAM,Strand\n')
            # if only OT is checked
            elif self.find_off_Checkbox.isChecked(
            ) and not self.output_all_checkbox.isChecked():
                f.write('Gene Name,Sequence,Off-Target Score\n')
            # if neither is checked
            elif not self.find_off_Checkbox.isChecked(
            ) and not self.output_all_checkbox.isChecked():
                f.write('Gene Name,Sequence\n')

            for essential in self.Output:
                i = 0
                for target in self.Output[essential]:
                    # check to see if the target did not match the user's parameters and they selected 'modify'
                    # if the target has an error, put 2 asterisks in front of the target sequence
                    if '*' in target[4]:
                        tag_id = "**" + essential + "-" + str(i + 1)
                    else:
                        tag_id = essential + "-" + str(i + 1)
                    i += 1

                    if self.to_csv_checkbox.isChecked():
                        tag_id = tag_id.replace(',', '')

                    # if both OT and output all are checked
                    if self.find_off_Checkbox.isChecked(
                    ) and self.output_all_checkbox.isChecked():
                        f.write(tag_id + ',' + target[1] + ',' +
                                str(target[3]) + ',' + str(target[5]) + ',' +
                                str(target[0]) + ',' + target[2] + ',' +
                                target[4][0] + '\n')
                    # if only output all is checked
                    elif not self.find_off_Checkbox.isChecked(
                    ) and self.output_all_checkbox.isChecked():
                        f.write(tag_id + ',' + target[1] + ',' +
                                str(target[3]) + ',' + str(target[0]) + ',' +
                                target[2] + ',' + target[4][0] + '\n')
                    # if only OT is checked
                    elif self.find_off_Checkbox.isChecked(
                    ) and not self.output_all_checkbox.isChecked():
                        f.write(tag_id + ',' + target[1] + ',' + target[5] +
                                '\n')
                    # if neither is checked
                    elif not self.find_off_Checkbox.isChecked(
                    ) and not self.output_all_checkbox.isChecked():
                        f.write(tag_id + "," + target[1] + "\n")
            f.close()
        except PermissionError:
            QtWidgets.QMessageBox.question(
                self, "File Cannot Open",
                "This file cannot be opened. Please make sure that the file is not opened elsewhere and try again.",
                QtWidgets.QMessageBox.Ok)
            return -1
        except Exception as e:
            print(e)
            return
class cspr_chromesome_selection(QtWidgets.QDialog):
    # the init function takes the cspr file name
    # it sets up the window
    # it searches the CSPR file for the chromesomes
    # it loads those chromesomes into the table
    def __init__(self):
        # qt stuff
        super(cspr_chromesome_selection, self).__init__()
        uic.loadUi("cspr_chromesome_selection.ui", self)
        self.setWindowTitle("Choose which chromesomes to pull data from!")
        self.setWindowIcon(Qt.QIcon("cas9image.png"))

        # button connections
        self.cancel_button.clicked.connect(self.cancel_function)
        self.submit_button.clicked.connect(self.submit_function)
        self.select_button.clicked.connect(self.load_chrom_names)

        # chrom_table stuff
        self.chromesome_table.setColumnCount(1)
        self.chromesome_table.setShowGrid(True)
        self.chromesome_table.setHorizontalHeaderLabels("Chromesome;".split(";"))
        self.chromesome_table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
        self.chromesome_table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.chromesome_table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)

        # cspr_files_available_table
        self.cspr_files_available_table.setColumnCount(1)
        self.cspr_files_available_table.setShowGrid(True)
        self.cspr_files_available_table.setHorizontalHeaderLabels("Files;".split(";"))
        self.cspr_files_available_table.setSelectionBehavior(QtWidgets.QAbstractItemView.SelectRows)
        self.cspr_files_available_table.setEditTriggers(QtWidgets.QAbstractItemView.NoEditTriggers)
        self.cspr_files_available_table.setSelectionMode(QtWidgets.QAbstractItemView.MultiSelection)

        # variables
        self.cspr_file  = ''
        self.gene = ''
        self.misc = ''
        self.avail_cspr = dict()
        self.myParser = CSPRparser(self.cspr_file)
        self.orgName = ''


    # launch function
    # parameter is a dictionary. Key is a organism name, value is the file name
    # parameter is passed from the popAnalysis class, it is the cspr_files dict
    def launch(self, cspr_file_data):
        self.avail_cspr = cspr_file_data
        self.cspr_files_available_table.setRowCount(len(self.avail_cspr))

        # populate the cspr_files_available_table
        loopCount = 0
        for item in self.avail_cspr:
            tabWidget = QtWidgets.QTableWidgetItem(item)
            self.cspr_files_available_table.setItem(loopCount, 0, tabWidget)
            loopCount += 1

        self.cspr_files_available_table.resizeColumnsToContents()
        self.show()

    # load_chrom_names makes sure that the user only selects 1 file
    # then it loads all of the chroms in that file, and populates the chromesome_table
    def load_chrom_names(self):
        selected = self.cspr_files_available_table.selectedItems()

        # error checking
        if len(selected) == 0:
            QtWidgets.QMessageBox.question(self, "Nothing Selected",
                                           "No organism selected. Please selected 1 File",
                                           QtWidgets.QMessageBox.Ok)
            return
        elif len(selected) > 1:
            QtWidgets.QMessageBox.question(self, "Too Many Selected",
                                           "Please only choose 1 File",
                                           QtWidgets.QMessageBox.Ok)
            return

        # get the data from the cspr file
        self.chromesome_table.clearContents()
        self.orgName = selected[0].text()
        self.file_name_line_edit.setText(self.avail_cspr[self.orgName])
        self.cspr_file = self.avail_cspr[self.orgName]
        self.myParser.fileName = self.avail_cspr[self.orgName]
        self.gene, self.misc = self.myParser.get_chromesome_names()

        # loop through and set the table
        loopCount = 0
        self.chromesome_table.setRowCount(len(self.myParser.chromesomesSelectedList))
        for item in self.myParser.chromesomesSelectedList:
            tabWidget = QtWidgets.QTableWidgetItem(item)
            self.chromesome_table.setItem(loopCount, 0, tabWidget)
            loopCount += 1
        self.chromesome_table.resizeColumnsToContents()

    # this function builds a new CSPR file from the selected
    # gets the Genome/Misc line from the main file, also gets the correct Karystats numbers
    # gets the chromesome data from the chromesomes selected
    # gets the repeats data for the chromesomes selected
    # it also updates the org_table and corresponding dict in the pop_analysis class
    def submit_function(self):
        selectedList = self.chromesome_table.selectedItems()
        # make sure at least one chrom is selected
        if len(selectedList) == 0:
            QtWidgets.QMessageBox.question(self, "Nothing Selected",
                                           "No items selected, please select at least 1 chromesome to pull out.",
                                           QtWidgets.QMessageBox.Ok)
            return

        # now make sure they have something in the subtext field. if not, throw a warning
        if self.output_file_name.text() == '':
            QtWidgets.QMessageBox.question(self, "Error! No file subtext given.",
                                           "Please give a subtext for the new CSPR file to differentiate the files.",
                                           QtWidgets.QMessageBox.Ok)
            return
        # get the output file name info
        dotIndex = self.cspr_file.find('.')
        outputFile = self.cspr_file[:dotIndex]
        outputFile = outputFile + self.output_file_name.text() + '.cspr'

        output_list = list()
        keep_list = list()
        for item in selectedList:
            output_list.append(item.text())
            keep_list.append(item.row())

        # remove the ones the user does not want from the karystats list
        for i in range(len(self.myParser.karystatsList), 0, -1):
            if i - 1 not in keep_list:
                self.myParser.karystatsList.pop(i - 1)


        # write the first three lines
        outputStream = open(outputFile, 'w')
        self.gene = self.gene.replace('\n', '')
        outputStream.write(self.gene + self.output_file_name.text())
        outputStream.write('\n')
        outputStream.write('KARYSTATS: ')
        # write all of the karystats data
        for item in self.myParser.karystatsList:
            outputStream.write(str(item))
            outputStream.write(',')
        outputStream.write('\n')
        outputStream.write(self.misc)

        # now go through and write the chromesomes
        for item in output_list:
            outputStream.write(item)
            csprData = open(self.cspr_file, 'r')
            buffer = csprData.readline()
            while True:
                if buffer == item:
                    buffer = csprData.readline()
                    while '>' not in buffer:
                        if buffer == 'REPEATS\n':
                            break
                        outputStream.write(buffer)
                        buffer = csprData.readline()
                    break
                buffer = csprData.readline()
            csprData.close()
        outputStream.write("REPEATS\n")

        # now to go through and write all of the repeats
        csprFile = open(self.cspr_file, 'r')
        csprData = csprFile.read().split('\n')

        # skip to the repeats part of the file
        index = csprData.index('REPEATS')
        index += 1

        storeRepeats = []
       # loop through the rest of the file
        while(index + 1 < len(csprData)):
            storeRepeats.clear()
            seed = csprData[index]
            repeats = csprData[index + 1].split('\t')

            # loop through and store the repeats i want
            for repeat in repeats:
                checkData = repeat.split(',')
                if len(checkData) > 1:
                    if int(checkData[0]) - 1 in keep_list:
                        storeRepeats.append(repeat)

            # if i found repeats I want, print it all to the file
            if len(storeRepeats) > 0:
                outputStream.write(seed)
                outputStream.write('\n')
                for item in storeRepeats:
                    outputStream.write(item)
                    outputStream.write('\t')
                outputStream.write('\n')
            index += 2

        # write the end of file, and close the file
        outputStream.write('END_OF_FILE')
        outputStream.close()

        # set the table and dict in the pop Analysis class
        GlobalSettings.pop_Analysis.cspr_files[self.orgName + self.output_file_name.text()] = outputFile
        GlobalSettings.pop_Analysis.org_Table.setRowCount(GlobalSettings.pop_Analysis.org_Table.rowCount() + 1)
        tabWidget = QtWidgets.QTableWidgetItem(self.orgName + self.output_file_name.text())
        GlobalSettings.pop_Analysis.org_Table.setItem(GlobalSettings.pop_Analysis.org_Table.rowCount() - 1, 0, tabWidget)
        # call the cancel function to close out the window and clear everything
        self.cancel_function()




    # cancel function
    # hides the window
    # clears table contents
    # clears the text variables as well
    def cancel_function(self):
        self.chromesome_table.clearContents()
        self.chromesome_table.setRowCount(0)
        self.cspr_files_available_table.clearContents()
        self.cspr_files_available_table.setRowCount(0)
        self.avail_cspr = dict()
        self.file_name_line_edit.setText(" ")
        self.cspr_file = " "
        self.output_file_name.setText("_Abridged")
        self.myParser.fileName = ' '
        self.gene = ' '
        self.misc = ' '
        self.orgName = ' '
        self.hide()
Beispiel #11
0
    def __init__(self):
        try:
            # qt stuff
            super(genLibrary, self).__init__()
            uic.loadUi(GlobalSettings.appdir + 'generate_library.ui', self)
            self.setWindowTitle('Generate Library')
            self.setWindowIcon(
                Qt.QIcon(GlobalSettings.appdir + 'cas9image.ico'))

            groupbox_style = """
            QGroupBox:title{subcontrol-origin: margin;
                            left: 10px;
                            padding: 0 5px 0 5px;}
            QGroupBox#Step1{border: 2px solid rgb(111,181,110);
                            border-radius: 9px;
                            font: bold 14pt 'Arial';
                            margin-top: 10px;}"""
            self.Step1.setStyleSheet(groupbox_style)
            self.Step2.setStyleSheet(groupbox_style.replace("Step1", "Step2"))
            self.Step3.setStyleSheet(groupbox_style.replace("Step1", "Step3"))
            self.Step4.setStyleSheet(groupbox_style.replace("Step1", "Step4"))

            # button connections
            self.cancel_button.clicked.connect(self.cancel_function)
            self.BrowseButton.clicked.connect(self.browse_function)
            self.submit_button.clicked.connect(self.submit_data)
            self.progressBar.setValue(0)

            # variables
            self.anno_data = dict()
            self.kegg_nonKegg = ''
            self.gen_lib_dict = dict()
            self.cspr_data = dict()
            self.Output = dict()
            self.off_tol = .05
            self.off_max_misMatch = 4
            self.off_target_running = False
            self.parser = CSPRparser("")

            # set the numbers for the num genes combo box item
            for i in range(10):
                self.numGenescomboBox.addItem(str(i + 1))

            # set the numbers for the minOn combo box
            for i in range(19, 70):
                self.minON_comboBox.addItem(str(i + 1))

            #scale UI
            self.scaleUI()

        except Exception as e:
            logger.critical("Error initializing generate library class.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)
Beispiel #12
0
class genLibrary(QtWidgets.QMainWindow):
    def __init__(self):
        try:
            # qt stuff
            super(genLibrary, self).__init__()
            uic.loadUi(GlobalSettings.appdir + 'generate_library.ui', self)
            self.setWindowTitle('Generate Library')
            self.setWindowIcon(
                Qt.QIcon(GlobalSettings.appdir + 'cas9image.ico'))

            groupbox_style = """
            QGroupBox:title{subcontrol-origin: margin;
                            left: 10px;
                            padding: 0 5px 0 5px;}
            QGroupBox#Step1{border: 2px solid rgb(111,181,110);
                            border-radius: 9px;
                            font: bold 14pt 'Arial';
                            margin-top: 10px;}"""
            self.Step1.setStyleSheet(groupbox_style)
            self.Step2.setStyleSheet(groupbox_style.replace("Step1", "Step2"))
            self.Step3.setStyleSheet(groupbox_style.replace("Step1", "Step3"))
            self.Step4.setStyleSheet(groupbox_style.replace("Step1", "Step4"))

            # button connections
            self.cancel_button.clicked.connect(self.cancel_function)
            self.BrowseButton.clicked.connect(self.browse_function)
            self.submit_button.clicked.connect(self.submit_data)
            self.progressBar.setValue(0)

            # variables
            self.anno_data = dict()
            self.kegg_nonKegg = ''
            self.gen_lib_dict = dict()
            self.cspr_data = dict()
            self.Output = dict()
            self.off_tol = .05
            self.off_max_misMatch = 4
            self.off_target_running = False
            self.parser = CSPRparser("")

            # set the numbers for the num genes combo box item
            for i in range(10):
                self.numGenescomboBox.addItem(str(i + 1))

            # set the numbers for the minOn combo box
            for i in range(19, 70):
                self.minON_comboBox.addItem(str(i + 1))

            #scale UI
            self.scaleUI()

        except Exception as e:
            logger.critical("Error initializing generate library class.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    #scale UI based on current screen
    def scaleUI(self):
        try:
            self.repaint()
            QtWidgets.QApplication.processEvents()

            screen = QtWidgets.QApplication.desktop().screenNumber(
                QtWidgets.QApplication.desktop().cursor().pos())
            screen = QtWidgets.QApplication.screens()[screen]
            dpi = screen.physicalDotsPerInch()
            width = screen.geometry().width()
            height = screen.geometry().height()

            # font scaling
            fontSize = 12
            self.fontSize = fontSize
            self.centralWidget().setStyleSheet("font: " + str(fontSize) +
                                               "pt 'Arial';")

            #scale title
            fontSize = 30
            self.label.setStyleSheet("font: bold " + str(fontSize) +
                                     "pt 'Arial';")

            self.adjustSize()

            currentWidth = self.size().width()
            currentHeight = self.size().height()

            # window scaling
            # 1920x1080 => 800x650
            scaledWidth = int((width * 950) / 1920)
            scaledHeight = int((height * 500) / 1080)

            if scaledHeight < currentHeight:
                scaledHeight = currentHeight
            if scaledWidth < currentWidth:
                scaledWidth = currentWidth

            screen = QtWidgets.QApplication.desktop().screenNumber(
                QtWidgets.QApplication.desktop().cursor().pos())
            centerPoint = QtWidgets.QApplication.desktop().screenGeometry(
                screen).center()
            x = centerPoint.x()
            y = centerPoint.y()
            x = x - (math.ceil(scaledWidth / 2))
            y = y - (math.ceil(scaledHeight / 2))
            self.setGeometry(x, y, scaledWidth, scaledHeight)

            self.repaint()
            QtWidgets.QApplication.processEvents()

        except Exception as e:
            logger.critical("Error in scaleUI() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    #center UI on current screen
    def centerUI(self):
        try:
            self.repaint()
            QtWidgets.QApplication.processEvents()

            #center window on current screen
            width = self.width()
            height = self.height()
            screen = QtWidgets.QApplication.desktop().screenNumber(
                QtWidgets.QApplication.desktop().cursor().pos())
            centerPoint = QtWidgets.QApplication.desktop().screenGeometry(
                screen).center()
            x = centerPoint.x()
            y = centerPoint.y()
            x = x - (math.ceil(width / 2))
            y = y - (math.ceil(height / 2))
            self.setGeometry(x, y, width, height)

            self.repaint()
            QtWidgets.QApplication.processEvents()
        except Exception as e:
            logger.critical("Error in centerUI() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this function launches the window
    # Parameters:
    #       annotation_data: a dictionary that has the data for the annotations searched for
    #           currently MainWindow's searches dict is passed into this
    #       org_file: the cspr_file that pertains to the organism that user is using at the time
    #       anno_type: whether the user is using KEGG or another type of annotation file
    def launch(self, annotation_data, org_file, anno_type):
        try:
            self.cspr_file = org_file
            self.db_file = org_file[:org_file.find('.')] + '_repeats.db'
            self.anno_data = annotation_data
            self.kegg_nonKegg = anno_type
            self.process = QtCore.QProcess()
            self.parser.fileName = org_file

            # setting the path and file name fields
            index1 = self.cspr_file.find('.')
            if platform.system() == "Windows":
                index2 = self.cspr_file.rfind('\\')
            else:
                index2 = self.cspr_file.rfind('/')

            self.filename_input.setText(self.cspr_file[index2 + 1:index1] +
                                        '_lib')

            if platform.system() == "Windows":
                self.output_path.setText(GlobalSettings.CSPR_DB + "\\")
            else:
                self.output_path.setText(GlobalSettings.CSPR_DB + "/")

            # depending on the type of file, build the dictionary accordingly
            self.build_dict_non_kegg()

            # get the gRNA data from the cspr file
            self.cspr_data = self.parser.gen_lib_parser(
                self.gen_lib_dict,
                GlobalSettings.mainWindow.endoChoice.currentText())
            self.get_endo_data()

            #center UI
            self.centerUI()
            self.show()
            self.activateWindow()
        except Exception as e:
            logger.critical("Error in launch() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    def get_endo_data(self):
        try:
            f = open(GlobalSettings.appdir + "CASPERinfo")
            self.endo_data = {}
            while True:
                line = f.readline()
                if line.startswith('ENDONUCLEASES'):
                    while True:
                        line = f.readline()
                        line = line.replace("\n", "")
                        if (line[0] == "-"):
                            break
                        line_tokened = line.split(";")
                        if len(line_tokened) == 10:
                            endo = line_tokened[0]
                            five_length = line_tokened[2]
                            seed_length = line_tokened[3]
                            three_length = line_tokened[4]
                            prime = line_tokened[5]
                            hsu = line_tokened[9]
                            self.endo_data[endo] = [
                                int(five_length) + int(three_length) +
                                int(seed_length), prime, "MATRIX:" + hsu
                            ]

                    break
            f.close()
        except Exception as e:
            logger.critical("Error in get_endo_data() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this is here in case the user clicks 'x' instead of cancel. Just calls the cancel function
    def closeEvent(self, event):
        try:
            closeWindow = self.cancel_function()

            # if the user is doing OT and does not decide to cancel it ignore the event
            if closeWindow == -2:
                event.ignore()
            else:
                event.accept()
        except Exception as e:
            logger.critical("Error in closeEvent() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this function takes all of the cspr data and compresses it again for off-target usage
    def compress_file_off(self):
        try:
            if platform.system() == "Windows":
                file = GlobalSettings.CSPR_DB + "\\off_input.txt"
            else:
                file = GlobalSettings.CSPR_DB + "/off_input.txt"
            f = open(file, 'w')
            for gene in self.cspr_data:
                for j in range(len(self.cspr_data[gene])):
                    loc = self.cspr_data[gene][j][0]
                    seq = self.cspr_data[gene][j][1]
                    pam = self.cspr_data[gene][j][2]
                    score = self.cspr_data[gene][j][3]
                    strand = self.cspr_data[gene][j][4]
                    output = str(loc) + ';' + str(seq) + ';' + str(
                        pam) + ';' + str(score) + ';' + str(strand)
                    f.write(output + '\n')
            f.close()
        except Exception as e:
            logger.critical(
                "Error in compress_file_off() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this function parses the temp_off file, which holds the off-target analysis results
    # it also updates each target in the cspr_data dictionary to replace the endo with the target's results in off-target
    def parse_off_file(self):
        try:
            if platform.system() == "Windows":
                file = GlobalSettings.CSPR_DB + "\\temp_off.txt"
            else:
                file = GlobalSettings.CSPR_DB + "/temp_off.txt"
            f = open(file, "r")
            file_data = f.read().split('\n')
            f.close()
            scoreDict = dict()

            # get the data from the file
            for i in range(len(file_data)):
                if file_data[i] == 'AVG OUTPUT':
                    continue
                elif file_data[i] != '':
                    buffer = file_data[i].split(':')
                    scoreDict[buffer[0]] = buffer[1]

            # update cspr_Data
            for gene in self.cspr_data:
                for i in range(len(self.cspr_data[gene])):
                    tempTuple = (self.cspr_data[gene][i][0],
                                 self.cspr_data[gene][i][1],
                                 self.cspr_data[gene][i][2],
                                 self.cspr_data[gene][i][3],
                                 self.cspr_data[gene][i][4],
                                 scoreDict[self.cspr_data[gene][i][1]])
                    self.cspr_data[gene][i] = tempTuple
        except Exception as e:
            logger.critical("Error in parse_off_file() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this function runs the off_target command
    # NOTE: some changes may be needed to get it to work with other OS besides windows
    def get_offTarget_data(self, num_targets, minScore, spaceValue,
                           output_file, fiveseq):
        try:
            self.perc = False
            self.bool_temp = False
            self.running = False

            # when finished, parse the off file, and then generate the lib
            def finished():
                if self.off_target_running:
                    self.progressBar.setValue(100)
                    self.parse_off_file()
                    did_work = self.generate(num_targets, minScore, spaceValue,
                                             output_file, fiveseq)
                    self.off_target_running = False
                    #self.process.kill()
                    if did_work != -1:
                        self.cancel_function()
                        msgBox = QtWidgets.QMessageBox()
                        msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                             "pt 'Arial'")
                        msgBox.setIcon(QtWidgets.QMessageBox.Icon.Information)
                        msgBox.setWindowTitle("Library Generated!")
                        msgBox.setText(
                            "CASPER has finished generating your library!")
                        msgBox.addButton(
                            QtWidgets.QMessageBox.StandardButton.Ok)
                        msgBox.exec()

                        os.remove(GlobalSettings.CSPR_DB + '/off_input.txt')
                        os.remove(GlobalSettings.CSPR_DB + '/temp_off.txt')

            # as off-targeting outputs things, update the off-target progress bar
            def progUpdate(p):
                line = str(self.process.readAllStandardOutput())
                line = line[2:]
                line = line[:len(line) - 1]
                if platform.system() == 'Windows':
                    for lines in filter(None, line.split(r'\r\n')):
                        if (lines.find("Running Off Target Algorithm for") !=
                                -1 and self.perc == False):
                            self.perc = True
                        if (self.perc == True and self.bool_temp == False and
                                lines.find("Running Off Target Algorithm for")
                                == -1):
                            lines = lines[32:]
                            lines = lines.replace("%", "")
                            if (float(lines) <= 99.5):
                                num = float(lines)
                                self.progressBar.setValue(num)
                            else:
                                self.bool_temp = True
                else:
                    for lines in filter(None, line.split(r'\n')):
                        if (lines.find("Running Off Target Algorithm for") !=
                                -1 and self.perc == False):
                            self.perc = True
                        if (self.perc == True and self.bool_temp == False and
                                lines.find("Running Off Target Algorithm for")
                                == -1):
                            lines = lines[32:]
                            lines = lines.replace("%", "")
                            if (float(lines) <= 99.5):
                                num = float(lines)
                                self.progressBar.setValue(num)
                            else:
                                self.bool_temp = True

            if platform.system() == 'Windows':
                app_path = GlobalSettings.appdir
                exe_path = app_path + 'OffTargetFolder\\OT_Win.exe'
                output_path = '"' + GlobalSettings.CSPR_DB + '\\temp_off.txt" '
                data_path = '"' + GlobalSettings.CSPR_DB + "\\off_input.txt" + '" '
            elif platform.system() == 'Linux':
                app_path = GlobalSettings.appdir.replace('\\', '/')
                exe_path = app_path + r'OffTargetFolder/OT_Lin'
                output_path = '"' + GlobalSettings.CSPR_DB + '/temp_off.txt" '
                data_path = '"' + GlobalSettings.CSPR_DB + "/off_input.txt" + '" '
            else:
                app_path = GlobalSettings.appdir.replace('\\', '/')
                exe_path = app_path + r'OffTargetFolder/OT_Mac'
                output_path = '"' + GlobalSettings.CSPR_DB + '/temp_off.txt" '
                data_path = '"' + GlobalSettings.CSPR_DB + "/off_input.txt" + '" '
            exe_path = '"' + exe_path + '" '
            cspr_path = '"' + self.cspr_file + '" '
            db_path = '"' + self.db_file + '" '
            filename = output_path
            filename = filename[:len(filename) - 1]
            filename = filename[1:]
            filename = filename.replace('"', '')
            CASPER_info_path = '"' + app_path + 'CASPERinfo' + '" '
            num_of_mismathes = self.off_max_misMatch
            tolerance = self.off_tol  # create command string
            endo = '"' + GlobalSettings.mainWindow.endoChoice.currentText(
            ) + '" '
            detailed_output = " False "
            avg_output = "True"
            hsu = ' "' + self.endo_data[
                GlobalSettings.mainWindow.endoChoice.currentText()][2] + '"'

            # set the off_target_running to true, to keep the user from closing the window while it is running
            self.off_target_running = True

            cmd = exe_path + data_path + endo + cspr_path + db_path + output_path + CASPER_info_path + str(
                num_of_mismathes) + ' ' + str(
                    tolerance) + detailed_output + avg_output + hsu

            if platform.system() == 'Windows':
                cmd = cmd.replace('/', '\\')
            self.process.readyReadStandardOutput.connect(
                partial(progUpdate, self.process))
            self.process.readyReadStandardError.connect(
                partial(progUpdate, self.process))
            self.progressBar.setValue(0)
            QtCore.QTimer.singleShot(100, partial(self.process.start, cmd))
            self.process.finished.connect(finished)
        except Exception as e:
            logger.critical(
                "Error in get_offTarget_data() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # submit function
    # this function takes all of the input from the window, and calls the generate function
    # Still need to add the checks for 5' seq, and the percentage thing
    def submit_data(self):
        try:
            if self.off_target_running:
                return
            output_file = self.output_path.text() + self.filename_input.text()

            minScore = int(self.minON_comboBox.currentText())
            num_targets = int(self.numGenescomboBox.currentText())
            fiveseq = ''

            # error check for csv files
            if output_file.endswith('.txt'):
                output_file = output_file.replace('.txt', '.csv')
            elif not output_file.endswith('.txt') and not output_file.endswith(
                    '.csv'):
                output_file = output_file + '.csv'

            # error checking for the space value
            # if they enter nothing, default to 15 and also make sure it's actually a digit
            if self.space_line_edit.text() == '':
                spaceValue = 15
            elif self.space_line_edit.text().isdigit():
                spaceValue = int(self.space_line_edit.text())
            elif not self.space_line_edit.text().isdigit():
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("Error")
                msgBox.setText(
                    "Please enter integers only for space between guides.")
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return
            # if space value is more than 200, default to 200
            if spaceValue > 200:
                spaceValue = 200
            elif spaceValue < 0:
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("Error")
                msgBox.setText(
                    "Please enter a space-value that is 0 or greater.")
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return

            if self.find_off_Checkbox.isChecked():
                self.compress_file_off()

            # get the fiveprimseq data and error check it
            if self.fiveprimeseq.text() != '' and self.fiveprimeseq.text(
            ).isalpha():
                fiveseq = self.fiveprimeseq.text()
            elif self.fiveprimeseq.text() != '' and not self.fiveprimeseq.text(
            ).isalpha():
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("Error")
                msgBox.setText(
                    "Please make sure only the letters A, T, G, or C are added into 5' End specificity box."
                )
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return

            # get the targeting range data, and error check it here
            if not self.start_target_range.text().isdigit(
            ) or not self.end_target_range.text().isdigit():
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("Error")
                msgBox.setText(
                    "Error: Please make sure that the start and end target ranges are numbers only. Please make sure that start is 0 or greater, and end is 100 or less. "
                )
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return
            elif int(self.start_target_range.text()) >= int(
                    self.end_target_range.text()):
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("Error")
                msgBox.setText(
                    "Please make sure that the start number is always less than the end number"
                )
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return

            # if they check Off-Targeting
            if self.find_off_Checkbox.isChecked():
                # make sure its a digit
                if self.maxOFF_comboBox.text(
                ) == '' or not self.maxOFF_comboBox.text().isdigit(
                ) and '.' not in self.maxOFF_comboBox.text():
                    msgBox = QtWidgets.QMessageBox()
                    msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                         "pt 'Arial'")
                    msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                    msgBox.setWindowTitle("Error")
                    msgBox.setText(
                        "Please enter only numbers for Maximum Off-Target Score. It cannot be left blank"
                    )
                    msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                    msgBox.exec()

                    return
                else:
                    # make sure it between 0 and .5
                    if not 0.0 < float(self.maxOFF_comboBox.text()) <= .5:
                        msgBox = QtWidgets.QMessageBox()
                        msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                             "pt 'Arial'")
                        msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                        msgBox.setWindowTitle("Error")
                        msgBox.setText(
                            "Please enter a max off-target score between 0 and 0.5!"
                        )
                        msgBox.addButton(
                            QtWidgets.QMessageBox.StandardButton.Ok)
                        msgBox.exec()

                        return
                    # compress the data, and then run off-targeting
                    self.compress_file_off()
                    self.get_offTarget_data(num_targets, minScore, spaceValue,
                                            output_file, fiveseq)
            else:
                # actually call the generate function
                did_work = self.generate(num_targets, minScore, spaceValue,
                                         output_file, fiveseq)

                if did_work != -1:
                    self.cancel_function()
                    msgBox = QtWidgets.QMessageBox()
                    msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                         "pt 'Arial'")
                    msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                    msgBox.setWindowTitle("Library Generated!")
                    msgBox.setText(
                        "CASPER has finished generating your library!")
                    msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                    msgBox.exec()

        except Exception as e:
            logger.critical("Error in submit_data() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # cancel function
    # clears everything and hides the window
    def cancel_function(self):
        try:
            if self.off_target_running:
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Question)
                msgBox.setWindowTitle("Off-Targeting is running")
                msgBox.setText(
                    "Off-Targetting is running. Closing this window will cancel that process, and return to the main window. .\n Do you wish to continue?"
                )
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Yes)
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.No)
                msgBox.exec()

                if (msgBox.result() == QtWidgets.QMessageBox.No):
                    return -2
                else:
                    self.off_target_running = False
                    self.process.kill()

            self.cspr_file = ''
            self.anno_data = list()

            self.filename_input.setText('')
            self.output_path.setText('')

            self.gen_lib_dict.clear()
            self.cspr_data.clear()
            self.Output.clear()

            self.start_target_range.setText('0')
            self.end_target_range.setText('100')
            self.space_line_edit.setText('15')
            self.find_off_Checkbox.setChecked(False)
            self.modifyParamscheckBox.setChecked(False)
            self.maxOFF_comboBox.setText('')
            self.fiveprimeseq.setText('')
            self.off_target_running = False
            self.progressBar.setValue(0)

            self.hide()
        except Exception as e:
            logger.critical("Error in cancel_function() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # browse function
    # allows the user to browse for a folder
    # stores their selection in the output_path line edit
    def browse_function(self):
        try:
            if self.off_target_running:
                return
            # get the folder
            filed = QtWidgets.QFileDialog()
            mydir = QtWidgets.QFileDialog.getExistingDirectory(
                filed, "Open a Folder", GlobalSettings.CSPR_DB,
                QtWidgets.QFileDialog.ShowDirsOnly)
            if (os.path.isdir(mydir) == False):
                return

            # make sure to append the '/' to the folder path
            if platform.system() == "Windwos":
                self.output_path.setText(mydir + "\\")
            else:
                self.output_path.setText(mydir + "/")
        except Exception as e:
            logger.critical("Error in browse_function() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # this function builds the dictionary that is used in the generate function
    # this is the version that builds it from data from feature_table, gbff, or gff
    # builds it exactly as Brian built it in the files given
    def build_dict_non_kegg(self):
        try:
            for tuple in self.anno_data:
                chrom = tuple[0]
                feature = tuple[1]
                feature_id = get_id(feature)
                feature_name = get_name(feature)
                feature_desc = get_description(feature)
                ### Order: chromosome number, gene start, gene end, dir of gene, gene description, gene name/locus tag
                self.gen_lib_dict[feature_name] = [
                    chrom,
                    int(feature.location.start),
                    int(feature.location.end),
                    get_strand(feature),
                    get_description(feature),
                    get_name(feature)
                ]
        except Exception as e:
            logger.critical(
                "Error in build_dict_non_kegg() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)

    # generate function taken from Brian's code
    def generate(self, num_targets_per_gene, score_limit, space, output_file,
                 fiveseq):
        try:
            deletedDict = dict()

            # check and see if we need to search based on target_range
            startNum = float(self.start_target_range.text())
            endNum = float(self.end_target_range.text())
            checkStartandEndBool = False
            if startNum != 0.0 or endNum != 100.0:
                if startNum >= 0.0 and endNum <= 100.0:
                    startNum = startNum / 100
                    endNum = endNum / 100
                    checkStartandEndBool = True
                else:
                    msgBox = QtWidgets.QMessageBox()
                    msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                         "pt 'Arial'")
                    msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                    msgBox.setWindowTitle("Invalid Targeting Range:")
                    msgBox.setText(
                        "Please select a targeting range between 0 and 100.")
                    msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                    msgBox.exec()

                    return -1

            for gene in self.gen_lib_dict:
                target_list = self.cspr_data[
                    gene]  # Gets the gRNAs for given gene

                #target_list = chrom_list[k:l+1]
                # Reverse the target list if the gene is on negative strand:
                if self.gen_lib_dict[gene][3] == "-":
                    target_list.reverse()

                # Filter out the guides with low scores and long strings of T's
                # also store the ones deleted if the user selects 'modify search parameters'
                if self.modifyParamscheckBox.isChecked():
                    deletedDict[gene] = list()
                for i in range(
                        len(target_list) - 1, -1,
                        -1):  ### Start at end and move backwards through list
                    # check the target_range here
                    if int(target_list[i][3]) < int(score_limit):
                        if self.modifyParamscheckBox.isChecked():
                            deletedDict[gene].append(target_list[i])
                        target_list.pop(i)
                    # check for gRNAs with poly T regions here
                    elif re.search("T{5,10}", target_list[i][1]) is not None:
                        if self.modifyParamscheckBox.isChecked():
                            deletedDict[gene].append(target_list[i])
                        target_list.pop(i)

                # check for the fiveseq
                if fiveseq != '':
                    for i in range(
                            len(target_list) - 1, -1, -1
                    ):  ### Start at end and move backwards through list
                        if not target_list[i][1].startswith(fiveseq.upper()):
                            if self.modifyParamscheckBox.isChecked():
                                deletedDict[gene].append(target_list[i])
                            target_list.pop(i)
                # check the target range here
                if checkStartandEndBool:
                    for i in range(len(target_list) - 1, -1, -1):
                        totalDistance = self.gen_lib_dict[gene][
                            2] - self.gen_lib_dict[gene][1]
                        target_loc = abs(int(target_list[i][0])) - int(
                            self.gen_lib_dict[gene][1])
                        myRatio = target_loc / totalDistance

                        if not (startNum <= myRatio <= endNum):
                            if self.modifyParamscheckBox.isChecked():
                                deletedDict[gene].append(target_list[i])
                            target_list.pop(i)
                # if the user selected off-targeting, check to see that the targets do not exceed the selected max score
                if self.find_off_Checkbox.isChecked():
                    maxScore = float(self.maxOFF_comboBox.text())
                    for i in range(len(target_list) - 1, -1, -1):
                        if maxScore < float(target_list[i][5]):
                            if self.modifyParamscheckBox.isChecked():
                                deletedDict[gene].append(target_list[i])
                            target_list.pop(i)
                # Now generating the targets
                self.Output[gene] = list()
                i = 0
                vec_index = 0
                prev_target = (0, "xyz", 'abc', 1, "-")
                while i < num_targets_per_gene:
                    # select the first five targets with the score and space filter that is set in the beginning
                    if len(target_list) == 0 or vec_index >= len(target_list):
                        break
                    while abs(
                            int(target_list[vec_index][0]) -
                            int(prev_target[0])) < int(space):
                        if target_list[vec_index][3] > prev_target[
                                3] and prev_target != (0, "xyz", "abc", 1,
                                                       "-"):
                            self.Output[gene].remove(prev_target)
                            self.Output[gene].append(target_list[vec_index])
                            prev_target = target_list[vec_index]
                        vec_index += 1
                        # check and see if there will be a indexing error
                        if vec_index >= len(target_list) - 1:
                            vec_index = vec_index - 1
                            break
                    # Add the new target to the output and add another to i
                    self.Output[gene].append(target_list[vec_index])
                    prev_target = target_list[vec_index]
                    i += 1
                    vec_index += 1

            # if the user selects modify search parameters, go through and check to see if each one has the number of targets that the user wanted
            # if not, append from the deletedDict until they do
            if self.modifyParamscheckBox.isChecked():
                for gene in self.Output:
                    if len(self.Output[gene]) < num_targets_per_gene:
                        for i in range(len(deletedDict[gene])):
                            if len(self.Output[gene]) == num_targets_per_gene:
                                break
                            else:
                                loc = deletedDict[gene][i][0]
                                seq = deletedDict[gene][i][1]
                                pam = deletedDict[gene][i][2]
                                score = deletedDict[gene][i][3]
                                strand = deletedDict[gene][i][4] + '*'
                                endo = deletedDict[gene][i][5]
                                self.Output[gene].append(
                                    (loc, seq, pam, score, strand, endo))

            # Now output to the file
            try:
                f = open(output_file, 'w')
                # if OT checked
                if self.find_off_Checkbox.isChecked():
                    f.write(
                        'Gene Name,Sequence,On-Target Score,Off-Target Score,Location,PAM,Strand\n'
                    )
                elif not self.find_off_Checkbox.isChecked():
                    f.write(
                        'Gene Name,Sequence,On-Target Score,Location,PAM,Strand\n'
                    )

                for gene in self.Output:
                    i = 0
                    gene_name = self.gen_lib_dict[gene][-1]
                    for target in self.Output[gene]:
                        # check to see if the target did not match the user's parameters and they selected 'modify'
                        # if the target has an error, put 2 asterisks in front of the target sequence
                        if '*' in target[4]:
                            tag_id = "**" + gene_name + "-" + str(i + 1)
                        else:
                            tag_id = gene_name + "-" + str(i + 1)
                        i += 1

                        tag_id = tag_id.replace(',', '')

                        # if OT checked
                        if self.find_off_Checkbox.isChecked():
                            f.write(tag_id + ',' + target[1] + ',' +
                                    str(target[3]) + ',' + str(target[5]) +
                                    ',' + str(abs(int(target[0]))) + ',' +
                                    target[2] + ',' + target[4][0] + '\n')
                        # if OT not checked
                        elif not self.find_off_Checkbox.isChecked():
                            f.write(tag_id + ',' + target[1] + ',' +
                                    str(target[3]) + ',' +
                                    str(abs(int(target[0]))) + ',' +
                                    target[2] + ',' + target[4][0] + '\n')

                f.close()
            except PermissionError:
                msgBox = QtWidgets.QMessageBox()
                msgBox.setStyleSheet("font: " + str(self.fontSize) +
                                     "pt 'Arial'")
                msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
                msgBox.setWindowTitle("File Cannot Open")
                msgBox.setText(
                    "This file cannot be opened. Please make sure that the file is not opened elsewhere and try again."
                )
                msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Ok)
                msgBox.exec()

                return -1
            except Exception as e:
                print(e)
                return
        except Exception as e:
            logger.critical("Error in generate() in generate library.")
            logger.critical(e)
            logger.critical(traceback.format_exc())
            msgBox = QtWidgets.QMessageBox()
            msgBox.setStyleSheet("font: " + str(self.fontSize) + "pt 'Arial'")
            msgBox.setIcon(QtWidgets.QMessageBox.Icon.Critical)
            msgBox.setWindowTitle("Fatal Error")
            msgBox.setText(
                "Fatal Error:\n" + str(e) +
                "\n\nFor more information on this error, look at CASPER.log in the application folder."
            )
            msgBox.addButton(QtWidgets.QMessageBox.StandardButton.Close)
            msgBox.exec()

            exit(-1)