コード例 #1
0
def main():
    meta = 'df_OSU_LMS_METASTATIC.csv'
    osu = 'LMS_OSU_MUTATION.csv'
    tcga = 'LMS_TCGA_MUTATION.csv'

    netA = []
    netB = []
    df = pd.read_csv(meta)
    netA = process(df, rb1=True)
    netB = process(df)

    df = pd.read_csv(osu)
    b = bae.BayesClassifier()
    b.fit(netA, netB, df)
    print(b.attributes)

    df = pd.read_csv(tcga)
    b.predict(netA, netB, df)

    df = pd.read_csv(tcga)
    be = bae.BayesClassifier()
    be.fit(netA, netB, df)
    print(be.attributes)

    df = pd.read_csv(osu)
    be.predict(netA, netB, df)
コード例 #2
0
def eval(sText):
    totalaccuracy_numer = 0
    totalaccuracy_denom = 0
    for test in range(0, 10):
        thisaccuracy_numer = 0
        thisaccuracy_denom = 0
        split(sText, "output")
        for doc in range(0, 5):
            print "i is: ", doc
            totaldic = defaultdict(lambda: 0)
            totalcorrectdic = defaultdict(lambda: 0)
            bc = BayesClassifier()
            bc.train("output.train{0}".format(doc % 5))
            bc.train("output.train{0}".format((doc + 1) % 5))
            bc.train("output.train{0}".format((doc + 2) % 5))
            bc.train("output.train{0}".format((doc + 3) % 5))
            reader = DataReader("output.train{0}".format((doc + 4) % 5))
            correct = 0
            total = 0
            hold = 0
            for label, tokens, company, date, price, risklength in reader:
                print label
                tokenstring = " "
                tokenstring = tokenstring.join(tokens)
                print date
                if risklength == 1:
                    print "invalid document; ignore"
                elif bc.classify(tokenstring, risklength, date) == "HOLD":
                    #elif bc.classify(tokenstring, risklength) == "HOLD":
                    hold += 1
                else:
                    totaldic[label] += 1
                    total += 1

                    if bc.classify(tokenstring, risklength, date) == label:
                        #if bc.classify(tokenstring, risklength) == label:
                        correct += 1
                        totalcorrectdic[label] += 1

            print "Holds: ", hold
            print "Accuracy:", correct / float(total)
            thisaccuracy_numer += correct / float(total)
            thisaccuracy_denom += 1
            for key in totaldic:
                print totalcorrectdic[key], totaldic[key]
                print key, " precision: ", totalcorrectdic[key] / float(
                    totaldic[key])

        print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom
        totalaccuracy_numer += thisaccuracy_numer
        totalaccuracy_denom += thisaccuracy_denom
    print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
コード例 #3
0
ファイル: Driver.py プロジェクト: uly26/BayesianClassifier
def main():
    data_path = str(sys.argv[1])
    output_path = "output/SimpleData"

    split(data_path, output_path)

    model = BayesClassifier()
    model.train(output_path+".train")

    if model.classify("I hate my AI class") > 0.5:
        print "positive"
    else:
        print "negative"
コード例 #4
0
def eval(sText):
    totalaccuracy_numer = 0;
    totalaccuracy_denom = 0;
    for test in range(0,10):
        thisaccuracy_numer = 0
        thisaccuracy_denom = 0;
        split(sText, "output")
        for doc in range (0,5):
            print "i is: ", doc
            totaldic = defaultdict(lambda: 0)
            totalcorrectdic = defaultdict(lambda: 0)
            bc = BayesClassifier()
            bc.train("output.train{0}".format(doc%5))
            bc.train("output.train{0}".format((doc+1)%5))
            bc.train("output.train{0}".format((doc+2)%5))
            bc.train("output.train{0}".format((doc+3)%5))
            reader = DataReader("output.train{0}".format((doc+4)%5))
            correct = 0
            total = 0
            hold = 0
            for label, tokens, company, date, price, risklength in reader:
                print label
                tokenstring = " "
                tokenstring = tokenstring.join(tokens) 
                print date
                if risklength == 1:
                    print "invalid document; ignore"
                elif bc.classify(tokenstring, risklength, date) == "HOLD":
                #elif bc.classify(tokenstring, risklength) == "HOLD":
                    hold += 1
                else:
                    totaldic[label] +=1
                    total += 1
                        
                    if bc.classify(tokenstring, risklength, date) == label:
                    #if bc.classify(tokenstring, risklength) == label:
                        correct += 1
                        totalcorrectdic[label] += 1
                    
            print "Holds: ", hold
            print "Accuracy:", correct/float(total)
            thisaccuracy_numer += correct/float(total)
            thisaccuracy_denom += 1
            for key in totaldic:
                    print totalcorrectdic[key], totaldic[key]
                    print key, " precision: ", totalcorrectdic[key]/float(totaldic[key])
        
        print "This Round Accuracy: ", thisaccuracy_numer/thisaccuracy_denom
        totalaccuracy_numer += thisaccuracy_numer
        totalaccuracy_denom += thisaccuracy_denom
    print "Total Accuracy: ", totalaccuracy_numer/totalaccuracy_denom
コード例 #5
0
def sendOk(request):
    username=request.session['username']
    receiver=request.POST.get('receiver')
    theme=request.POST.get('theme')
    message=request.POST.get('message')
    flag=BayesClassifier.getClassify(message)
    Email.objects.create(sender=username,receiver=receiver,theme=theme,message=message,flag=flag)
    emails=[]
    emails=Email.objects.filter(sender=username)
    return render(request,'send.htm',{'username':username,'emails':emails})
コード例 #6
0
def run_bayes(data_path):
    training_path = os.path.join(data_path,"TRAINING")
    classifications = [x for x in os.listdir(training_path)
                       if os.path.isdir(os.path.join(training_path,x))]
    classifier = BayesClassifier.BayesClassifier()
    train_bayes(classifier, classifications, training_path)
    testing_path = os.path.join(data_path,"TESTING")
    print "Running on Training Data (asterisk means incorrect)..."
    test_bayes(classifier, classifications, training_path)
    print "Running on Testing Data (asterisk means incorrect)..."
    test_bayes(classifier, classifications, testing_path)
コード例 #7
0
def testClassifier(outputLabel):
    bc = BayesClassifier()
    bc.train(outputLabel + ".train")
    reader = DataReader(outputLabel + ".test")
    correctLabel = {}
    numberGuess = {}
    correct = 0.0
    total = 0.0
    for label, tokens in reader:
        if not label in correctLabel:
            correctLabel[label] = 0.0
        guess = bc.classify(" ".join(tokens))
        if not guess in numberGuess:
            numberGuess[guess] = 0.0
        if guess == label:
            correctLabel[guess] += 1
            correct += 1
        numberGuess[guess] += 1
        total += 1
    for label in correctLabel:
        print "Correct " + label, "-", correctLabel[label] / numberGuess[label]
    print "Total accuracy -", correct / total
コード例 #8
0
def sendOk(request):
    username = request.session['username']
    receiver = request.POST.get('receiver')
    theme = request.POST.get('theme')
    message = request.POST.get('message')
    flag = BayesClassifier.getClassify(message)
    Email.objects.create(sender=username,
                         receiver=receiver,
                         theme=theme,
                         message=message,
                         flag=flag)
    emails = []
    emails = Email.objects.filter(sender=username)
    return render(request, 'send.htm', {
        'username': username,
        'emails': emails
    })
コード例 #9
0
                C2_x.append(float(rowItem))
                firstItemAppended = True
            elif( firstItemAppended == True):
                C2_y.append(float(rowItem))
C1_features = list()
C2_features = list()
C1_z = np.array(C1_x)**2 + np.array(C1_y)**2
C2_z = np.array(C2_x)**2 + np.array(C2_y)**2 
C1_features.append(C1_x)
C1_features.append(C1_y)
#C1_features.append(C1_z)
C2_features.append(C2_x)
C2_features.append(C2_y)
#C2_features.append(C2_z)

classifier = BayesClassifier.GaussianBayesClassifier()

C1_distribution = classifier.getClassDistribution(C1_features)
C2_distribution = classifier.getClassDistribution(C2_features)
C1_x_variance = C1_distribution[1][0] * C1_distribution[1][0]
C1_y_variance = C1_distribution[1][1] * C1_distribution[1][1]
C1_cov_matrix = classifier.calc_2d_covariance_matrix(C1_x, C1_y,
 C1_distribution[0][0],C1_distribution[0][1],C1_x_variance,C1_y_variance)

C2_x_variance = C2_distribution[1][0] * C2_distribution[1][0]
C2_y_variance = C2_distribution[1][1] * C2_distribution[1][1]
C2_cov_matrix = classifier.calc_2d_covariance_matrix(C2_x, C2_y,
 C2_distribution[0][0],C2_distribution[0][1],C2_x_variance,C2_y_variance)

x_est50 = list(np.arange(-6, 6, 0.1))
y_est50 = []
コード例 #10
0
dt.PrintAccuracy(Y_test, Y_pred_Gini)

print("\n\n")

# Prediction using Entropy
print("Results Using Entropy:")
Y_pred_Entropy = dt.Predict(X_test, DT_Entropy)
dt.PrintAccuracy(Y_test, Y_pred_Entropy)

print("\n\n")

# Bayes Classifier
print("\n\n")
print("Bayes Classifier on Iris dataset:")

# Building Phase
Dataset = bc.ImportDataset_Iris()
X, Y, X_train, X_test, Y_train, Y_test = bc.SplitDataset(Dataset)
BayesClassifier = bc.Train(X_train, Y_train)

print("\n\n")

# Operational Phase
# Prediction using Gini
print("Results Using Bayes Classifier:")
Y_pred = bc.Predict(BayesClassifier, X_test)
bc.PrintAccuracy(Y_test, Y_pred)

print("\n\n")

# - Part C - Predictive Analytics ------------------------------------------------------------------------------------------------
コード例 #11
0
ファイル: Test.py プロジェクト: willpots/BayesClassifier
# c = BayesClassifier()

# c.train("data/movies_test.train")

# print c.classify("coyote ugly has no plot . the storyline is as skimpy as the costumes the women at the bar wear . it's a poor attempt at a remake of flashdance and its major audience seems to be lonely lonely teenage boys with no access to p**n .")
# print c.classify("so stupid and juvenile this film is ! i can't believe that this was based on shakespeare ! this movie was too cheesy for me to withstand ! ( and i usually like bad b-grade movies too ! )")
# print c.classify("and nasty film by a director whose stock in trade is over the top unpleasantness e . g . the fury . de palma has to be one of the worst film makers of all time , right down there with tarantino and david lynch . if you want to see how gangster films should be done stick with coppola and scorsese .")
# print c.classify("this movie is smart , funny , and hits the spirit of corporate america right in the gonads . in the lines of catch-22 , this movie is brilliant , i loved it .")
# print c.classify("beautifully depicted the life of the french indian war . good battle scenes , with a romantic and heroic fame to it .")



# 3. Use these four lines to test classifier on a large subset of data. Output will be logged to ouputFile.txt
# This gives us our accuracy rate.
from BayesClassifier import *
c = BayesClassifier()
c.train("data/20.train")
print c.test("data/20.test", "outputFile.txt")






# MISC TESTING LINES
# split("data/movies.data", "data/movies_test")
# c.train("data/simple.data")
# c.train("data/movies.data")
# c.load("data/movies.data.pickle")
# c.train("data/20news.data")
# c.load("data/20news.data.pickle")
コード例 #12
0
    def print_gambit_message(self, message, intent_index, message_type,
                             pre_feature_key):
        # TODO : 로직 정리
        print(message_type)
        response_message_type = ''
        feature_key = ''
        response_text = []
        if message_type == 'BG':
            response_text.append(
                PresenTALKUtil.get_random_message('bot_starting'))
            # BS : Bot Start, BQ : Bot Question
            response_message_type = 'BS'

        elif message_type == 'BS':
            # 사용자의 응답이 긍정일 경우
            if BayesClassifier.get_message_class(message) == 'POS':
                temp_dic = self.print_feature_message()
                feature_key = temp_dic['feature_key']
                response_text.append(temp_dic['feature_message'])
                response_message_type = 'BF'
            # 긍정이 아닐 경우
            else:
                # 사용자가 원하는 걸 직접 질문
                response_text.append(
                    PresenTALKUtil.get_random_message('reject'))
                response_message_type = 'BJ'

        elif message_type == 'BF':
            is_find = False
            if pre_feature_key != '':
                # feature 추출
                is_find = self.find_feature(message, pre_feature_key)

            temp_dic = self.print_feature_message()
            feature_key = temp_dic['feature_key']

            # feature 추출이 모두 끝난 경우
            if feature_key == '':
                # rec = recommand_present.Recommandation()
                # response_text.append(rec.get_recommand_message(self.feature_dict))
                # who, age, price, why
                recommend_list = recommend_data2.recommend_in_DB(
                    self.feature_dict['object'], self.feature_dict['age'],
                    '10', self.feature_dict['purpose'])

                present_list = ''
                for present in recommend_list:
                    if present_list == '':
                        present_list = present
                    else:
                        present_list = present_list + ', ' + present

                recommend_text = '선물 추천 결과입니다. ' + str(
                    present_list) + ' 어떠신가요?'
                response_text.append(recommend_text)

                response_message_type = 'BR'
            else:
                response_text.append(temp_dic['feature_message'])
                response_message_type = 'BF'

        # TODO : 추후 추가 ( 로직의 복잡도 증가 )
        # elif message_type == 'BI':
        #     if BayesClassifier.get_message_class(message) == 'POS':
        #         pass
        #     else:
        #         pass

        elif message_type == 'BN':
            response_text = self.find_matching_rule(message)

            if len(response_text) == 0:
                response_message_type = 'BN'
                response_text = self.response_quibble_message(message)

            # TODO : 추후 추가 ( 로직의 복잡도 증가 )
            # 매칭하는 룰을 찾지 못했을 경우 특정 토픽으로 이야기하기
            # if len(response_text) == 0:
            #     user_info_text = self.get_user_info_message()
            #
            #     if user_info_text == '':
            #         response_message_type = 'BN'
            #         response_text = self.response_quibble_message(message)
            #     else:
            #         response_text.append('제가 공부한 분야로 이야기해 보시겠어요?')
            #         response_text.append(user_info_text)
            #         response_message_type = 'BI'

        elif message_type == 'BR':
            # TODO : 로직 정리 필요
            if BayesClassifier.get_message_class(message) == 'POS':
                response_message_type = 'BE'
                response_text.append(
                    PresenTALKUtil.get_random_message('bot_ending'))
            else:
                # 사용자가 원하는 걸 직접 질문
                response_text.append(
                    PresenTALKUtil.get_random_message('reject'))
                response_message_type = 'BJ'

        # TODO : 대답 하고 나서의 로직 추가
        elif message_type == 'BA':
            return_message = self.find_matching_rule(message)
            if len(return_message) != 0:
                response_text = return_message
            else:
                response_text = self.response_quibble_message(message)
            response_message_type = 'BN'

        elif message_type == 'BJ':
            if BayesClassifier.get_message_class(message) == 'POS':
                response_message_type = 'BE'
                response_text.append(
                    PresenTALKUtil.get_random_message('bot_ending'))
            else:
                response_message_type = 'BN'
                response_text.append(
                    PresenTALKUtil.get_random_message('bot_restart'))

        elif message_type == 'BE':
            if BayesClassifier.get_message_class(message) == 'POS':
                self.isFinish = True
                response_text.append(
                    PresenTALKUtil.get_random_message('bot_bye'))

        # TODO : 시나리오 구성
        else:
            return_message = self.find_matching_rule(message)
            if len(return_message) != 0:
                response_text = return_message
            else:
                response_text = self.response_quibble_message(message)
            response_message_type = 'BN'

        self.save_conversation_list(response_text, response_message_type,
                                    feature_key)

        return response_text
コード例 #13
0
dc = DC.DistanceClassifier()
dc.train(trainInput, trainOutput, 'euclidean')

resultsDcTrain = dc.test(trainInput, trainOutput, 'euclidean')
resultsDcTest = dc.test(testInput, testOutput, 'euclidean')

print("Distance classifier accuracy for train set : " + str(np.sum([x[0] for x in resultsDcTrain[0]])/np.sum([x[1] for x in resultsDcTrain[0]])))
print("Distance classifier accuracy for test set : " + str(np.sum([x[0] for x in resultsDcTest[0]])/np.sum([x[1] for x in resultsDcTest[0]])))

IO.PlotCM(resultsDcTrain[1], save = True, fileName = "distanceConfusionTrain")
IO.PlotCM(resultsDcTest[1], save = True, fileName = "distanceConfusionTest")

'''
Assignment 3:
'''
bc = BC.BayesClassifier(7,5)
bc.train(trainInput, trainOutput)

resultsBcTrain = bc.test(trainInput, trainOutput)
resultsBcTest = bc.test(testInput, testOutput)

print("Bayes classifier accuracy for train set for 5 & 7 : " + str(resultsBcTrain))
print("Bayes classifier accuracy for test set for 5 & 7: " + str(resultsBcTest))

bc = BC.BayesClassifier(1,4)
bc.train(trainInput, trainOutput)

resultsBcTrain = bc.test(trainInput, trainOutput)
resultsBcTest = bc.test(testInput, testOutput)

print("Bayes classifier accuracy for train set for 1 & 4: " + str(resultsBcTrain))
コード例 #14
0
def receive_server(socklink, address):
    try:
        with sem:
            # 第一次从用户处接受将要发送字符串的长度
            length = socketlink.recv(102400)
            length = eval(length.decode())

            totalData = []  # 记录用户发送的 byte 类型通信内容
            current_length = 0
            while current_length < length:
                recv = socklink.recv(102400)
                current_length += len(recv)
                totalData.append(recv)

            # 解码获得字典格式的通信内容
            data = b''.join(totalData)
            data.decode()

            if not data:  # 未接受到相关信息
                print('No data received from ' + str(address) +
                      "!  Request cancelled." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))
                raise Exception("No data received.")
            else:
                data_dic = eval(data)  # 按约定好的通信格式,将字符串转换为字典变量

            if data_dic['action'] == 'request-result':
                # 在这里调用模型进行预测,然后返回一个结果(1(非垃圾)或-1(垃圾))
                bayes = Classifier.BayesClassifier()
                list = bayes.classify(data_dic['content'])
                content = []  # 如果返回结果为 0 说明出现了错误
                for result_item in list:
                    if result_item == 'ham':
                        content.append(1)
                    elif result_item == 'spam':
                        content.append(-1)
                    else:
                        content.append(0)

                # 以下为服务器对于用户端的返回,返回预测结果
                response_data = {
                    'action': 'response-reslut',
                    'content': content
                }
                socklink.sendall(repr(response_data).encode())
                print('Request-result from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'request-info':
                # 在这里可以根据data['content']中的username(这里的username是用户的目前的邮箱)获得对应的配置规则
                # 创建数据库操作对象
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 以下为服务器对于用户端的返回,返回对应的配置
                response_data = DB_operation.search_owner(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('Request-info from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'post':
                # 在这里需要写一个函数来接收data['content']中的配置规则,并返回一个值代表是否存储正确与否(1代表存储正确,-1代表存储失败)
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 以下为服务器对于用户端的返回,返回是否上传成功
                response_data = DB_operation.add_one_rule(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('post from ' + str(address) + " handled successfully." +
                      "    " + str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'delete':
                # 收到一个删除过滤规则的请求
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 调用数据库规则删除函数,返回操作是否成功
                response_data = DB_operation.delete_one_rule(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('delete request from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            else:
                # 如果以上格式均不符合,那么打印错误
                print('Unknown request type from ' + str(address) +
                      '. Request cancelled' + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

    except Exception as error:
        print('Error happened in processing thread. \nClient: ' +
              str(address) + "\nError: " + str(error) + "    " +
              str(datetime.datetime.now().strftime('%F %T')))
        traceback.print_exc()
    finally:
        socklink.close()
コード例 #15
0
def bayesClassifier(x,y):
	clf = bc.BayesClassifier()
	return crossValidation(clf,x,y)