def routine_2(fn, k, tree, numOfLines): kmers_examined = 0 with open(fn, 'r') as fh: count = 0 for myline in fh: count += 1 print('seq = {0}, k = {1}'.format(count,k)) for j in range(len(myline) - k): kmers_examined += 1 this_kmer = myline[j:j+k] if count <= int(numOfLines * training_perc): tree.find_in_tree(this_kmer, False , kmers_examined, k, True, sequenceIndex=count-1) else: tree.find_in_tree(this_kmer, True, kmers_examined, k, True, sequenceIndex=count-1) TreeClass.found_kmers = False TreeClass.check_tree(root, kmers_examined, k) return tree
def add_all_nodes(current, depth): chars = ['A', 'C', 'G', 'T'] for char in chars: current.add_child(TreeClass.Node(char, current, depth+1)) return current
def predict(self, method, age, sex, days, criteri,rhytmc, rhytm): if method == "Логистическая регрессия": return LogReg.Predict(age, sex, days, criteri,rhytmc, rhytm) if method == "Метод опорных векторов": return SVMFIle.Predict(age, sex, days, criteri,rhytmc, rhytm) if method == "Дерево решений": return TreeClass.Predict(age, sex, days, criteri,rhytmc, rhytm) if method == "Случайный лес": return RandomForestFile.Predict(age, sex, days, criteri,rhytmc, rhytm)
def initialize_tree(): tree = TreeClass.Tree() root = tree.root root = add_all_nodes(root , 0) for child1 in root.children: child1 = add_all_nodes(child1, 1) for child2 in child1.children: child2 = add_all_nodes(child2, 2) for child3 in child2.children: child3 = add_all_nodes(child3, 3) return root, tree
if __name__ == "__main__": def log_uncaught_exceptions(ex_cls, ex, tb): text = '{}: {}:\n'.format(ex_cls.__name__, ex) import traceback text += ''.join(traceback.format_tb(tb)) print(text) QtWidgets.QMessageBox.critical(None, 'Error', text) quit() sys.excepthook = log_uncaught_exceptions if not os.path.isfile('logReg') or not os.path.isfile('SVM') or not os.path.isfile('randomForest') or not os.path.isfile('Tree'): thread1 = Thread(target=LogReg.train()) thread2 = Thread(target=SVMFIle.train()) thread3 = Thread(target=TreeClass.train()) thread4 = Thread(target=RandomForestFile.train()) thread1.start() thread2.start() thread3.start() thread4.start() thread1.join() thread2.join() thread3.join() thread4.join() app = QtWidgets.QApplication(sys.argv) w = Ui() w.show() sys.exit(app.exec_())
### Tree TTree = {} for line in open("../Data/nodes.dmp"): line = [i.strip() for i in line.split("|")][0:3] Node = TaxonTerm(line[0], "", line[2]) Node.Parent = line[1] Node.Childs = set() Node.seqs = [] TTree[line[0]] = Node for ID, Node in TTree.items(): Node.Parent = TTree[Node.Parent] Node.Parent.Childs.add(Node) # print len(TTree) ### genus Data Data = pd.read_excel("../Data/ZYName.xlsx", 0, index_col=0) for i in Data.index: print i, Data.loc[i, "Name"] IDs = set() IDs = tc.getAllNodeofGenus(TTree[str(Data.loc[i, "Tid"])], IDs) out = [] for ID in IDs: out.append([ID, ID2Name[ID], TTree[ID].Rank]) # print out out = pd.DataFrame(np.array(out), columns=["ID", "Name", "Rank"]) out.to_excel( "../Result0923/Sepcies/" + str(i) + "_" + str(Data.loc[i, "Tid"]) + "_" + str(Data.loc[i, "TName"]) + ".xls" ) print "Done!"