def main(): meta = 'df_OSU_LMS_METASTATIC.csv' osu = 'LMS_OSU_MUTATION.csv' tcga = 'LMS_TCGA_MUTATION.csv' netA = [] netB = [] df = pd.read_csv(meta) netA = process(df, rb1=True) netB = process(df) df = pd.read_csv(osu) b = bae.BayesClassifier() b.fit(netA, netB, df) print(b.attributes) df = pd.read_csv(tcga) b.predict(netA, netB, df) df = pd.read_csv(tcga) be = bae.BayesClassifier() be.fit(netA, netB, df) print(be.attributes) df = pd.read_csv(osu) be.predict(netA, netB, df)
def run_bayes(data_path): training_path = os.path.join(data_path,"TRAINING") classifications = [x for x in os.listdir(training_path) if os.path.isdir(os.path.join(training_path,x))] classifier = BayesClassifier.BayesClassifier() train_bayes(classifier, classifications, training_path) testing_path = os.path.join(data_path,"TESTING") print "Running on Training Data (asterisk means incorrect)..." test_bayes(classifier, classifications, training_path) print "Running on Testing Data (asterisk means incorrect)..." test_bayes(classifier, classifications, testing_path)
def eval(sText): totalaccuracy_numer = 0 totalaccuracy_denom = 0 for test in range(0, 10): thisaccuracy_numer = 0 thisaccuracy_denom = 0 split(sText, "output") for doc in range(0, 5): print "i is: ", doc totaldic = defaultdict(lambda: 0) totalcorrectdic = defaultdict(lambda: 0) bc = BayesClassifier() bc.train("output.train{0}".format(doc % 5)) bc.train("output.train{0}".format((doc + 1) % 5)) bc.train("output.train{0}".format((doc + 2) % 5)) bc.train("output.train{0}".format((doc + 3) % 5)) reader = DataReader("output.train{0}".format((doc + 4) % 5)) correct = 0 total = 0 hold = 0 for label, tokens, company, date, price, risklength in reader: print label tokenstring = " " tokenstring = tokenstring.join(tokens) print date if risklength == 1: print "invalid document; ignore" elif bc.classify(tokenstring, risklength, date) == "HOLD": #elif bc.classify(tokenstring, risklength) == "HOLD": hold += 1 else: totaldic[label] += 1 total += 1 if bc.classify(tokenstring, risklength, date) == label: #if bc.classify(tokenstring, risklength) == label: correct += 1 totalcorrectdic[label] += 1 print "Holds: ", hold print "Accuracy:", correct / float(total) thisaccuracy_numer += correct / float(total) thisaccuracy_denom += 1 for key in totaldic: print totalcorrectdic[key], totaldic[key] print key, " precision: ", totalcorrectdic[key] / float( totaldic[key]) print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom totalaccuracy_numer += thisaccuracy_numer totalaccuracy_denom += thisaccuracy_denom print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
def testClassifier(outputLabel): bc = BayesClassifier() bc.train(outputLabel + ".train") reader = DataReader(outputLabel + ".test") correctLabel = {} numberGuess = {} correct = 0.0 total = 0.0 for label, tokens in reader: if not label in correctLabel: correctLabel[label] = 0.0 guess = bc.classify(" ".join(tokens)) if not guess in numberGuess: numberGuess[guess] = 0.0 if guess == label: correctLabel[guess] += 1 correct += 1 numberGuess[guess] += 1 total += 1 for label in correctLabel: print "Correct " + label, "-", correctLabel[label] / numberGuess[label] print "Total accuracy -", correct / total
dc = DC.DistanceClassifier() dc.train(trainInput, trainOutput, 'euclidean') resultsDcTrain = dc.test(trainInput, trainOutput, 'euclidean') resultsDcTest = dc.test(testInput, testOutput, 'euclidean') print("Distance classifier accuracy for train set : " + str(np.sum([x[0] for x in resultsDcTrain[0]])/np.sum([x[1] for x in resultsDcTrain[0]]))) print("Distance classifier accuracy for test set : " + str(np.sum([x[0] for x in resultsDcTest[0]])/np.sum([x[1] for x in resultsDcTest[0]]))) IO.PlotCM(resultsDcTrain[1], save = True, fileName = "distanceConfusionTrain") IO.PlotCM(resultsDcTest[1], save = True, fileName = "distanceConfusionTest") ''' Assignment 3: ''' bc = BC.BayesClassifier(7,5) bc.train(trainInput, trainOutput) resultsBcTrain = bc.test(trainInput, trainOutput) resultsBcTest = bc.test(testInput, testOutput) print("Bayes classifier accuracy for train set for 5 & 7 : " + str(resultsBcTrain)) print("Bayes classifier accuracy for test set for 5 & 7: " + str(resultsBcTest)) bc = BC.BayesClassifier(1,4) bc.train(trainInput, trainOutput) resultsBcTrain = bc.test(trainInput, trainOutput) resultsBcTest = bc.test(testInput, testOutput) print("Bayes classifier accuracy for train set for 1 & 4: " + str(resultsBcTrain))
def receive_server(socklink, address): try: with sem: # 第一次从用户处接受将要发送字符串的长度 length = socketlink.recv(102400) length = eval(length.decode()) totalData = [] # 记录用户发送的 byte 类型通信内容 current_length = 0 while current_length < length: recv = socklink.recv(102400) current_length += len(recv) totalData.append(recv) # 解码获得字典格式的通信内容 data = b''.join(totalData) data.decode() if not data: # 未接受到相关信息 print('No data received from ' + str(address) + "! Request cancelled." + " " + str(datetime.datetime.now().strftime('%F %T'))) raise Exception("No data received.") else: data_dic = eval(data) # 按约定好的通信格式,将字符串转换为字典变量 if data_dic['action'] == 'request-result': # 在这里调用模型进行预测,然后返回一个结果(1(非垃圾)或-1(垃圾)) bayes = Classifier.BayesClassifier() list = bayes.classify(data_dic['content']) content = [] # 如果返回结果为 0 说明出现了错误 for result_item in list: if result_item == 'ham': content.append(1) elif result_item == 'spam': content.append(-1) else: content.append(0) # 以下为服务器对于用户端的返回,返回预测结果 response_data = { 'action': 'response-reslut', 'content': content } socklink.sendall(repr(response_data).encode()) print('Request-result from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'request-info': # 在这里可以根据data['content']中的username(这里的username是用户的目前的邮箱)获得对应的配置规则 # 创建数据库操作对象 DB_operation = filter_rule_DB_operation.Filter_operation() # 以下为服务器对于用户端的返回,返回对应的配置 response_data = DB_operation.search_owner(data_dic) socklink.sendall(repr(response_data).encode()) print('Request-info from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'post': # 在这里需要写一个函数来接收data['content']中的配置规则,并返回一个值代表是否存储正确与否(1代表存储正确,-1代表存储失败) DB_operation = filter_rule_DB_operation.Filter_operation() # 以下为服务器对于用户端的返回,返回是否上传成功 response_data = DB_operation.add_one_rule(data_dic) socklink.sendall(repr(response_data).encode()) print('post from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'delete': # 收到一个删除过滤规则的请求 DB_operation = filter_rule_DB_operation.Filter_operation() # 调用数据库规则删除函数,返回操作是否成功 response_data = DB_operation.delete_one_rule(data_dic) socklink.sendall(repr(response_data).encode()) print('delete request from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) else: # 如果以上格式均不符合,那么打印错误 print('Unknown request type from ' + str(address) + '. Request cancelled' + " " + str(datetime.datetime.now().strftime('%F %T'))) except Exception as error: print('Error happened in processing thread. \nClient: ' + str(address) + "\nError: " + str(error) + " " + str(datetime.datetime.now().strftime('%F %T'))) traceback.print_exc() finally: socklink.close()
def bayesClassifier(x,y): clf = bc.BayesClassifier() return crossValidation(clf,x,y)