def main(): meta = 'df_OSU_LMS_METASTATIC.csv' osu = 'LMS_OSU_MUTATION.csv' tcga = 'LMS_TCGA_MUTATION.csv' netA = [] netB = [] df = pd.read_csv(meta) netA = process(df, rb1=True) netB = process(df) df = pd.read_csv(osu) b = bae.BayesClassifier() b.fit(netA, netB, df) print(b.attributes) df = pd.read_csv(tcga) b.predict(netA, netB, df) df = pd.read_csv(tcga) be = bae.BayesClassifier() be.fit(netA, netB, df) print(be.attributes) df = pd.read_csv(osu) be.predict(netA, netB, df)
def eval(sText): totalaccuracy_numer = 0 totalaccuracy_denom = 0 for test in range(0, 10): thisaccuracy_numer = 0 thisaccuracy_denom = 0 split(sText, "output") for doc in range(0, 5): print "i is: ", doc totaldic = defaultdict(lambda: 0) totalcorrectdic = defaultdict(lambda: 0) bc = BayesClassifier() bc.train("output.train{0}".format(doc % 5)) bc.train("output.train{0}".format((doc + 1) % 5)) bc.train("output.train{0}".format((doc + 2) % 5)) bc.train("output.train{0}".format((doc + 3) % 5)) reader = DataReader("output.train{0}".format((doc + 4) % 5)) correct = 0 total = 0 hold = 0 for label, tokens, company, date, price, risklength in reader: print label tokenstring = " " tokenstring = tokenstring.join(tokens) print date if risklength == 1: print "invalid document; ignore" elif bc.classify(tokenstring, risklength, date) == "HOLD": #elif bc.classify(tokenstring, risklength) == "HOLD": hold += 1 else: totaldic[label] += 1 total += 1 if bc.classify(tokenstring, risklength, date) == label: #if bc.classify(tokenstring, risklength) == label: correct += 1 totalcorrectdic[label] += 1 print "Holds: ", hold print "Accuracy:", correct / float(total) thisaccuracy_numer += correct / float(total) thisaccuracy_denom += 1 for key in totaldic: print totalcorrectdic[key], totaldic[key] print key, " precision: ", totalcorrectdic[key] / float( totaldic[key]) print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom totalaccuracy_numer += thisaccuracy_numer totalaccuracy_denom += thisaccuracy_denom print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
def main(): data_path = str(sys.argv[1]) output_path = "output/SimpleData" split(data_path, output_path) model = BayesClassifier() model.train(output_path+".train") if model.classify("I hate my AI class") > 0.5: print "positive" else: print "negative"
def eval(sText): totalaccuracy_numer = 0; totalaccuracy_denom = 0; for test in range(0,10): thisaccuracy_numer = 0 thisaccuracy_denom = 0; split(sText, "output") for doc in range (0,5): print "i is: ", doc totaldic = defaultdict(lambda: 0) totalcorrectdic = defaultdict(lambda: 0) bc = BayesClassifier() bc.train("output.train{0}".format(doc%5)) bc.train("output.train{0}".format((doc+1)%5)) bc.train("output.train{0}".format((doc+2)%5)) bc.train("output.train{0}".format((doc+3)%5)) reader = DataReader("output.train{0}".format((doc+4)%5)) correct = 0 total = 0 hold = 0 for label, tokens, company, date, price, risklength in reader: print label tokenstring = " " tokenstring = tokenstring.join(tokens) print date if risklength == 1: print "invalid document; ignore" elif bc.classify(tokenstring, risklength, date) == "HOLD": #elif bc.classify(tokenstring, risklength) == "HOLD": hold += 1 else: totaldic[label] +=1 total += 1 if bc.classify(tokenstring, risklength, date) == label: #if bc.classify(tokenstring, risklength) == label: correct += 1 totalcorrectdic[label] += 1 print "Holds: ", hold print "Accuracy:", correct/float(total) thisaccuracy_numer += correct/float(total) thisaccuracy_denom += 1 for key in totaldic: print totalcorrectdic[key], totaldic[key] print key, " precision: ", totalcorrectdic[key]/float(totaldic[key]) print "This Round Accuracy: ", thisaccuracy_numer/thisaccuracy_denom totalaccuracy_numer += thisaccuracy_numer totalaccuracy_denom += thisaccuracy_denom print "Total Accuracy: ", totalaccuracy_numer/totalaccuracy_denom
def sendOk(request): username=request.session['username'] receiver=request.POST.get('receiver') theme=request.POST.get('theme') message=request.POST.get('message') flag=BayesClassifier.getClassify(message) Email.objects.create(sender=username,receiver=receiver,theme=theme,message=message,flag=flag) emails=[] emails=Email.objects.filter(sender=username) return render(request,'send.htm',{'username':username,'emails':emails})
def run_bayes(data_path): training_path = os.path.join(data_path,"TRAINING") classifications = [x for x in os.listdir(training_path) if os.path.isdir(os.path.join(training_path,x))] classifier = BayesClassifier.BayesClassifier() train_bayes(classifier, classifications, training_path) testing_path = os.path.join(data_path,"TESTING") print "Running on Training Data (asterisk means incorrect)..." test_bayes(classifier, classifications, training_path) print "Running on Testing Data (asterisk means incorrect)..." test_bayes(classifier, classifications, testing_path)
def testClassifier(outputLabel): bc = BayesClassifier() bc.train(outputLabel + ".train") reader = DataReader(outputLabel + ".test") correctLabel = {} numberGuess = {} correct = 0.0 total = 0.0 for label, tokens in reader: if not label in correctLabel: correctLabel[label] = 0.0 guess = bc.classify(" ".join(tokens)) if not guess in numberGuess: numberGuess[guess] = 0.0 if guess == label: correctLabel[guess] += 1 correct += 1 numberGuess[guess] += 1 total += 1 for label in correctLabel: print "Correct " + label, "-", correctLabel[label] / numberGuess[label] print "Total accuracy -", correct / total
def sendOk(request): username = request.session['username'] receiver = request.POST.get('receiver') theme = request.POST.get('theme') message = request.POST.get('message') flag = BayesClassifier.getClassify(message) Email.objects.create(sender=username, receiver=receiver, theme=theme, message=message, flag=flag) emails = [] emails = Email.objects.filter(sender=username) return render(request, 'send.htm', { 'username': username, 'emails': emails })
C2_x.append(float(rowItem)) firstItemAppended = True elif( firstItemAppended == True): C2_y.append(float(rowItem)) C1_features = list() C2_features = list() C1_z = np.array(C1_x)**2 + np.array(C1_y)**2 C2_z = np.array(C2_x)**2 + np.array(C2_y)**2 C1_features.append(C1_x) C1_features.append(C1_y) #C1_features.append(C1_z) C2_features.append(C2_x) C2_features.append(C2_y) #C2_features.append(C2_z) classifier = BayesClassifier.GaussianBayesClassifier() C1_distribution = classifier.getClassDistribution(C1_features) C2_distribution = classifier.getClassDistribution(C2_features) C1_x_variance = C1_distribution[1][0] * C1_distribution[1][0] C1_y_variance = C1_distribution[1][1] * C1_distribution[1][1] C1_cov_matrix = classifier.calc_2d_covariance_matrix(C1_x, C1_y, C1_distribution[0][0],C1_distribution[0][1],C1_x_variance,C1_y_variance) C2_x_variance = C2_distribution[1][0] * C2_distribution[1][0] C2_y_variance = C2_distribution[1][1] * C2_distribution[1][1] C2_cov_matrix = classifier.calc_2d_covariance_matrix(C2_x, C2_y, C2_distribution[0][0],C2_distribution[0][1],C2_x_variance,C2_y_variance) x_est50 = list(np.arange(-6, 6, 0.1)) y_est50 = []
dt.PrintAccuracy(Y_test, Y_pred_Gini) print("\n\n") # Prediction using Entropy print("Results Using Entropy:") Y_pred_Entropy = dt.Predict(X_test, DT_Entropy) dt.PrintAccuracy(Y_test, Y_pred_Entropy) print("\n\n") # Bayes Classifier print("\n\n") print("Bayes Classifier on Iris dataset:") # Building Phase Dataset = bc.ImportDataset_Iris() X, Y, X_train, X_test, Y_train, Y_test = bc.SplitDataset(Dataset) BayesClassifier = bc.Train(X_train, Y_train) print("\n\n") # Operational Phase # Prediction using Gini print("Results Using Bayes Classifier:") Y_pred = bc.Predict(BayesClassifier, X_test) bc.PrintAccuracy(Y_test, Y_pred) print("\n\n") # - Part C - Predictive Analytics ------------------------------------------------------------------------------------------------
# c = BayesClassifier() # c.train("data/movies_test.train") # print c.classify("coyote ugly has no plot . the storyline is as skimpy as the costumes the women at the bar wear . it's a poor attempt at a remake of flashdance and its major audience seems to be lonely lonely teenage boys with no access to p**n .") # print c.classify("so stupid and juvenile this film is ! i can't believe that this was based on shakespeare ! this movie was too cheesy for me to withstand ! ( and i usually like bad b-grade movies too ! )") # print c.classify("and nasty film by a director whose stock in trade is over the top unpleasantness e . g . the fury . de palma has to be one of the worst film makers of all time , right down there with tarantino and david lynch . if you want to see how gangster films should be done stick with coppola and scorsese .") # print c.classify("this movie is smart , funny , and hits the spirit of corporate america right in the gonads . in the lines of catch-22 , this movie is brilliant , i loved it .") # print c.classify("beautifully depicted the life of the french indian war . good battle scenes , with a romantic and heroic fame to it .") # 3. Use these four lines to test classifier on a large subset of data. Output will be logged to ouputFile.txt # This gives us our accuracy rate. from BayesClassifier import * c = BayesClassifier() c.train("data/20.train") print c.test("data/20.test", "outputFile.txt") # MISC TESTING LINES # split("data/movies.data", "data/movies_test") # c.train("data/simple.data") # c.train("data/movies.data") # c.load("data/movies.data.pickle") # c.train("data/20news.data") # c.load("data/20news.data.pickle")
def print_gambit_message(self, message, intent_index, message_type, pre_feature_key): # TODO : 로직 정리 print(message_type) response_message_type = '' feature_key = '' response_text = [] if message_type == 'BG': response_text.append( PresenTALKUtil.get_random_message('bot_starting')) # BS : Bot Start, BQ : Bot Question response_message_type = 'BS' elif message_type == 'BS': # 사용자의 응답이 긍정일 경우 if BayesClassifier.get_message_class(message) == 'POS': temp_dic = self.print_feature_message() feature_key = temp_dic['feature_key'] response_text.append(temp_dic['feature_message']) response_message_type = 'BF' # 긍정이 아닐 경우 else: # 사용자가 원하는 걸 직접 질문 response_text.append( PresenTALKUtil.get_random_message('reject')) response_message_type = 'BJ' elif message_type == 'BF': is_find = False if pre_feature_key != '': # feature 추출 is_find = self.find_feature(message, pre_feature_key) temp_dic = self.print_feature_message() feature_key = temp_dic['feature_key'] # feature 추출이 모두 끝난 경우 if feature_key == '': # rec = recommand_present.Recommandation() # response_text.append(rec.get_recommand_message(self.feature_dict)) # who, age, price, why recommend_list = recommend_data2.recommend_in_DB( self.feature_dict['object'], self.feature_dict['age'], '10', self.feature_dict['purpose']) present_list = '' for present in recommend_list: if present_list == '': present_list = present else: present_list = present_list + ', ' + present recommend_text = '선물 추천 결과입니다. ' + str( present_list) + ' 어떠신가요?' response_text.append(recommend_text) response_message_type = 'BR' else: response_text.append(temp_dic['feature_message']) response_message_type = 'BF' # TODO : 추후 추가 ( 로직의 복잡도 증가 ) # elif message_type == 'BI': # if BayesClassifier.get_message_class(message) == 'POS': # pass # else: # pass elif message_type == 'BN': response_text = self.find_matching_rule(message) if len(response_text) == 0: response_message_type = 'BN' response_text = self.response_quibble_message(message) # TODO : 추후 추가 ( 로직의 복잡도 증가 ) # 매칭하는 룰을 찾지 못했을 경우 특정 토픽으로 이야기하기 # if len(response_text) == 0: # user_info_text = self.get_user_info_message() # # if user_info_text == '': # response_message_type = 'BN' # response_text = self.response_quibble_message(message) # else: # response_text.append('제가 공부한 분야로 이야기해 보시겠어요?') # response_text.append(user_info_text) # response_message_type = 'BI' elif message_type == 'BR': # TODO : 로직 정리 필요 if BayesClassifier.get_message_class(message) == 'POS': response_message_type = 'BE' response_text.append( PresenTALKUtil.get_random_message('bot_ending')) else: # 사용자가 원하는 걸 직접 질문 response_text.append( PresenTALKUtil.get_random_message('reject')) response_message_type = 'BJ' # TODO : 대답 하고 나서의 로직 추가 elif message_type == 'BA': return_message = self.find_matching_rule(message) if len(return_message) != 0: response_text = return_message else: response_text = self.response_quibble_message(message) response_message_type = 'BN' elif message_type == 'BJ': if BayesClassifier.get_message_class(message) == 'POS': response_message_type = 'BE' response_text.append( PresenTALKUtil.get_random_message('bot_ending')) else: response_message_type = 'BN' response_text.append( PresenTALKUtil.get_random_message('bot_restart')) elif message_type == 'BE': if BayesClassifier.get_message_class(message) == 'POS': self.isFinish = True response_text.append( PresenTALKUtil.get_random_message('bot_bye')) # TODO : 시나리오 구성 else: return_message = self.find_matching_rule(message) if len(return_message) != 0: response_text = return_message else: response_text = self.response_quibble_message(message) response_message_type = 'BN' self.save_conversation_list(response_text, response_message_type, feature_key) return response_text
dc = DC.DistanceClassifier() dc.train(trainInput, trainOutput, 'euclidean') resultsDcTrain = dc.test(trainInput, trainOutput, 'euclidean') resultsDcTest = dc.test(testInput, testOutput, 'euclidean') print("Distance classifier accuracy for train set : " + str(np.sum([x[0] for x in resultsDcTrain[0]])/np.sum([x[1] for x in resultsDcTrain[0]]))) print("Distance classifier accuracy for test set : " + str(np.sum([x[0] for x in resultsDcTest[0]])/np.sum([x[1] for x in resultsDcTest[0]]))) IO.PlotCM(resultsDcTrain[1], save = True, fileName = "distanceConfusionTrain") IO.PlotCM(resultsDcTest[1], save = True, fileName = "distanceConfusionTest") ''' Assignment 3: ''' bc = BC.BayesClassifier(7,5) bc.train(trainInput, trainOutput) resultsBcTrain = bc.test(trainInput, trainOutput) resultsBcTest = bc.test(testInput, testOutput) print("Bayes classifier accuracy for train set for 5 & 7 : " + str(resultsBcTrain)) print("Bayes classifier accuracy for test set for 5 & 7: " + str(resultsBcTest)) bc = BC.BayesClassifier(1,4) bc.train(trainInput, trainOutput) resultsBcTrain = bc.test(trainInput, trainOutput) resultsBcTest = bc.test(testInput, testOutput) print("Bayes classifier accuracy for train set for 1 & 4: " + str(resultsBcTrain))
def receive_server(socklink, address): try: with sem: # 第一次从用户处接受将要发送字符串的长度 length = socketlink.recv(102400) length = eval(length.decode()) totalData = [] # 记录用户发送的 byte 类型通信内容 current_length = 0 while current_length < length: recv = socklink.recv(102400) current_length += len(recv) totalData.append(recv) # 解码获得字典格式的通信内容 data = b''.join(totalData) data.decode() if not data: # 未接受到相关信息 print('No data received from ' + str(address) + "! Request cancelled." + " " + str(datetime.datetime.now().strftime('%F %T'))) raise Exception("No data received.") else: data_dic = eval(data) # 按约定好的通信格式,将字符串转换为字典变量 if data_dic['action'] == 'request-result': # 在这里调用模型进行预测,然后返回一个结果(1(非垃圾)或-1(垃圾)) bayes = Classifier.BayesClassifier() list = bayes.classify(data_dic['content']) content = [] # 如果返回结果为 0 说明出现了错误 for result_item in list: if result_item == 'ham': content.append(1) elif result_item == 'spam': content.append(-1) else: content.append(0) # 以下为服务器对于用户端的返回,返回预测结果 response_data = { 'action': 'response-reslut', 'content': content } socklink.sendall(repr(response_data).encode()) print('Request-result from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'request-info': # 在这里可以根据data['content']中的username(这里的username是用户的目前的邮箱)获得对应的配置规则 # 创建数据库操作对象 DB_operation = filter_rule_DB_operation.Filter_operation() # 以下为服务器对于用户端的返回,返回对应的配置 response_data = DB_operation.search_owner(data_dic) socklink.sendall(repr(response_data).encode()) print('Request-info from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'post': # 在这里需要写一个函数来接收data['content']中的配置规则,并返回一个值代表是否存储正确与否(1代表存储正确,-1代表存储失败) DB_operation = filter_rule_DB_operation.Filter_operation() # 以下为服务器对于用户端的返回,返回是否上传成功 response_data = DB_operation.add_one_rule(data_dic) socklink.sendall(repr(response_data).encode()) print('post from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) elif data_dic['action'] == 'delete': # 收到一个删除过滤规则的请求 DB_operation = filter_rule_DB_operation.Filter_operation() # 调用数据库规则删除函数,返回操作是否成功 response_data = DB_operation.delete_one_rule(data_dic) socklink.sendall(repr(response_data).encode()) print('delete request from ' + str(address) + " handled successfully." + " " + str(datetime.datetime.now().strftime('%F %T'))) else: # 如果以上格式均不符合,那么打印错误 print('Unknown request type from ' + str(address) + '. Request cancelled' + " " + str(datetime.datetime.now().strftime('%F %T'))) except Exception as error: print('Error happened in processing thread. \nClient: ' + str(address) + "\nError: " + str(error) + " " + str(datetime.datetime.now().strftime('%F %T'))) traceback.print_exc() finally: socklink.close()
def bayesClassifier(x,y): clf = bc.BayesClassifier() return crossValidation(clf,x,y)