Esempio n. 1
0
def routine_2(fn, k, tree, numOfLines):
    
    kmers_examined = 0
    
    with open(fn, 'r') as fh:
        
        count = 0
        
        for myline in fh:  
            count += 1
            print('seq = {0}, k = {1}'.format(count,k))
            
            for j in range(len(myline) - k):
                
                kmers_examined += 1
                this_kmer = myline[j:j+k]
                  
                if count <= int(numOfLines * training_perc):
                    tree.find_in_tree(this_kmer, False , kmers_examined, k, True, sequenceIndex=count-1)
                else:
                    tree.find_in_tree(this_kmer, True, kmers_examined, k, True, sequenceIndex=count-1)
        
        TreeClass.found_kmers = False
        TreeClass.check_tree(root, kmers_examined, k)
         
    return tree
Esempio n. 2
0
def add_all_nodes(current, depth):
    
    chars = ['A', 'C', 'G', 'T']
    for char in chars:
        current.add_child(TreeClass.Node(char, current, depth+1))
    
    return current
Esempio n. 3
0
 def predict(self, method, age, sex, days, criteri,rhytmc, rhytm):
     if method == "Логистическая регрессия":
         return LogReg.Predict(age, sex, days, criteri,rhytmc, rhytm)
     if method == "Метод опорных векторов":
         return SVMFIle.Predict(age, sex, days, criteri,rhytmc, rhytm)
     if method == "Дерево решений":
         return TreeClass.Predict(age, sex, days, criteri,rhytmc, rhytm)
     if method == "Случайный лес":
         return RandomForestFile.Predict(age, sex, days, criteri,rhytmc, rhytm)
Esempio n. 4
0
def initialize_tree():
    
    tree = TreeClass.Tree()
    root = tree.root
    
    root = add_all_nodes(root , 0)
    for child1 in root.children:
        child1 = add_all_nodes(child1, 1)
        for child2 in child1.children:
            child2 = add_all_nodes(child2, 2)
            for child3 in child2.children:
                child3 = add_all_nodes(child3, 3)
    
    return root, tree
Esempio n. 5
0
if __name__ == "__main__":

    def log_uncaught_exceptions(ex_cls, ex, tb):
        text = '{}: {}:\n'.format(ex_cls.__name__, ex)
        import traceback
        text += ''.join(traceback.format_tb(tb))

        print(text)
        QtWidgets.QMessageBox.critical(None, 'Error', text)
        quit()

    sys.excepthook = log_uncaught_exceptions
    if not os.path.isfile('logReg') or not os.path.isfile('SVM') or not os.path.isfile('randomForest') or not os.path.isfile('Tree'):
        thread1 = Thread(target=LogReg.train())
        thread2 = Thread(target=SVMFIle.train())
        thread3 = Thread(target=TreeClass.train())
        thread4 = Thread(target=RandomForestFile.train())

        thread1.start()
        thread2.start()
        thread3.start()
        thread4.start()
        thread1.join()
        thread2.join()
        thread3.join()
        thread4.join()

    app = QtWidgets.QApplication(sys.argv)
    w = Ui()
    w.show()
    sys.exit(app.exec_())
Esempio n. 6
0
### Tree
TTree = {}
for line in open("../Data/nodes.dmp"):
    line = [i.strip() for i in line.split("|")][0:3]
    Node = TaxonTerm(line[0], "", line[2])
    Node.Parent = line[1]
    Node.Childs = set()
    Node.seqs = []
    TTree[line[0]] = Node
for ID, Node in TTree.items():
    Node.Parent = TTree[Node.Parent]
    Node.Parent.Childs.add(Node)
# print len(TTree)

### genus Data
Data = pd.read_excel("../Data/ZYName.xlsx", 0, index_col=0)
for i in Data.index:
    print i, Data.loc[i, "Name"]
    IDs = set()
    IDs = tc.getAllNodeofGenus(TTree[str(Data.loc[i, "Tid"])], IDs)
    out = []
    for ID in IDs:
        out.append([ID, ID2Name[ID], TTree[ID].Rank])
    # print out
    out = pd.DataFrame(np.array(out), columns=["ID", "Name", "Rank"])
    out.to_excel(
        "../Result0923/Sepcies/" + str(i) + "_" + str(Data.loc[i, "Tid"]) + "_" + str(Data.loc[i, "TName"]) + ".xls"
    )

print "Done!"