コード例 #1
0
def main():
    meta = 'df_OSU_LMS_METASTATIC.csv'
    osu = 'LMS_OSU_MUTATION.csv'
    tcga = 'LMS_TCGA_MUTATION.csv'

    netA = []
    netB = []
    df = pd.read_csv(meta)
    netA = process(df, rb1=True)
    netB = process(df)

    df = pd.read_csv(osu)
    b = bae.BayesClassifier()
    b.fit(netA, netB, df)
    print(b.attributes)

    df = pd.read_csv(tcga)
    b.predict(netA, netB, df)

    df = pd.read_csv(tcga)
    be = bae.BayesClassifier()
    be.fit(netA, netB, df)
    print(be.attributes)

    df = pd.read_csv(osu)
    be.predict(netA, netB, df)
コード例 #2
0
def run_bayes(data_path):
    training_path = os.path.join(data_path,"TRAINING")
    classifications = [x for x in os.listdir(training_path)
                       if os.path.isdir(os.path.join(training_path,x))]
    classifier = BayesClassifier.BayesClassifier()
    train_bayes(classifier, classifications, training_path)
    testing_path = os.path.join(data_path,"TESTING")
    print "Running on Training Data (asterisk means incorrect)..."
    test_bayes(classifier, classifications, training_path)
    print "Running on Testing Data (asterisk means incorrect)..."
    test_bayes(classifier, classifications, testing_path)
コード例 #3
0
def eval(sText):
    totalaccuracy_numer = 0
    totalaccuracy_denom = 0
    for test in range(0, 10):
        thisaccuracy_numer = 0
        thisaccuracy_denom = 0
        split(sText, "output")
        for doc in range(0, 5):
            print "i is: ", doc
            totaldic = defaultdict(lambda: 0)
            totalcorrectdic = defaultdict(lambda: 0)
            bc = BayesClassifier()
            bc.train("output.train{0}".format(doc % 5))
            bc.train("output.train{0}".format((doc + 1) % 5))
            bc.train("output.train{0}".format((doc + 2) % 5))
            bc.train("output.train{0}".format((doc + 3) % 5))
            reader = DataReader("output.train{0}".format((doc + 4) % 5))
            correct = 0
            total = 0
            hold = 0
            for label, tokens, company, date, price, risklength in reader:
                print label
                tokenstring = " "
                tokenstring = tokenstring.join(tokens)
                print date
                if risklength == 1:
                    print "invalid document; ignore"
                elif bc.classify(tokenstring, risklength, date) == "HOLD":
                    #elif bc.classify(tokenstring, risklength) == "HOLD":
                    hold += 1
                else:
                    totaldic[label] += 1
                    total += 1

                    if bc.classify(tokenstring, risklength, date) == label:
                        #if bc.classify(tokenstring, risklength) == label:
                        correct += 1
                        totalcorrectdic[label] += 1

            print "Holds: ", hold
            print "Accuracy:", correct / float(total)
            thisaccuracy_numer += correct / float(total)
            thisaccuracy_denom += 1
            for key in totaldic:
                print totalcorrectdic[key], totaldic[key]
                print key, " precision: ", totalcorrectdic[key] / float(
                    totaldic[key])

        print "This Round Accuracy: ", thisaccuracy_numer / thisaccuracy_denom
        totalaccuracy_numer += thisaccuracy_numer
        totalaccuracy_denom += thisaccuracy_denom
    print "Total Accuracy: ", totalaccuracy_numer / totalaccuracy_denom
コード例 #4
0
def testClassifier(outputLabel):
    bc = BayesClassifier()
    bc.train(outputLabel + ".train")
    reader = DataReader(outputLabel + ".test")
    correctLabel = {}
    numberGuess = {}
    correct = 0.0
    total = 0.0
    for label, tokens in reader:
        if not label in correctLabel:
            correctLabel[label] = 0.0
        guess = bc.classify(" ".join(tokens))
        if not guess in numberGuess:
            numberGuess[guess] = 0.0
        if guess == label:
            correctLabel[guess] += 1
            correct += 1
        numberGuess[guess] += 1
        total += 1
    for label in correctLabel:
        print "Correct " + label, "-", correctLabel[label] / numberGuess[label]
    print "Total accuracy -", correct / total
コード例 #5
0
dc = DC.DistanceClassifier()
dc.train(trainInput, trainOutput, 'euclidean')

resultsDcTrain = dc.test(trainInput, trainOutput, 'euclidean')
resultsDcTest = dc.test(testInput, testOutput, 'euclidean')

print("Distance classifier accuracy for train set : " + str(np.sum([x[0] for x in resultsDcTrain[0]])/np.sum([x[1] for x in resultsDcTrain[0]])))
print("Distance classifier accuracy for test set : " + str(np.sum([x[0] for x in resultsDcTest[0]])/np.sum([x[1] for x in resultsDcTest[0]])))

IO.PlotCM(resultsDcTrain[1], save = True, fileName = "distanceConfusionTrain")
IO.PlotCM(resultsDcTest[1], save = True, fileName = "distanceConfusionTest")

'''
Assignment 3:
'''
bc = BC.BayesClassifier(7,5)
bc.train(trainInput, trainOutput)

resultsBcTrain = bc.test(trainInput, trainOutput)
resultsBcTest = bc.test(testInput, testOutput)

print("Bayes classifier accuracy for train set for 5 & 7 : " + str(resultsBcTrain))
print("Bayes classifier accuracy for test set for 5 & 7: " + str(resultsBcTest))

bc = BC.BayesClassifier(1,4)
bc.train(trainInput, trainOutput)

resultsBcTrain = bc.test(trainInput, trainOutput)
resultsBcTest = bc.test(testInput, testOutput)

print("Bayes classifier accuracy for train set for 1 & 4: " + str(resultsBcTrain))
コード例 #6
0
def receive_server(socklink, address):
    try:
        with sem:
            # 第一次从用户处接受将要发送字符串的长度
            length = socketlink.recv(102400)
            length = eval(length.decode())

            totalData = []  # 记录用户发送的 byte 类型通信内容
            current_length = 0
            while current_length < length:
                recv = socklink.recv(102400)
                current_length += len(recv)
                totalData.append(recv)

            # 解码获得字典格式的通信内容
            data = b''.join(totalData)
            data.decode()

            if not data:  # 未接受到相关信息
                print('No data received from ' + str(address) +
                      "!  Request cancelled." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))
                raise Exception("No data received.")
            else:
                data_dic = eval(data)  # 按约定好的通信格式,将字符串转换为字典变量

            if data_dic['action'] == 'request-result':
                # 在这里调用模型进行预测,然后返回一个结果(1(非垃圾)或-1(垃圾))
                bayes = Classifier.BayesClassifier()
                list = bayes.classify(data_dic['content'])
                content = []  # 如果返回结果为 0 说明出现了错误
                for result_item in list:
                    if result_item == 'ham':
                        content.append(1)
                    elif result_item == 'spam':
                        content.append(-1)
                    else:
                        content.append(0)

                # 以下为服务器对于用户端的返回,返回预测结果
                response_data = {
                    'action': 'response-reslut',
                    'content': content
                }
                socklink.sendall(repr(response_data).encode())
                print('Request-result from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'request-info':
                # 在这里可以根据data['content']中的username(这里的username是用户的目前的邮箱)获得对应的配置规则
                # 创建数据库操作对象
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 以下为服务器对于用户端的返回,返回对应的配置
                response_data = DB_operation.search_owner(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('Request-info from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'post':
                # 在这里需要写一个函数来接收data['content']中的配置规则,并返回一个值代表是否存储正确与否(1代表存储正确,-1代表存储失败)
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 以下为服务器对于用户端的返回,返回是否上传成功
                response_data = DB_operation.add_one_rule(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('post from ' + str(address) + " handled successfully." +
                      "    " + str(datetime.datetime.now().strftime('%F %T')))

            elif data_dic['action'] == 'delete':
                # 收到一个删除过滤规则的请求
                DB_operation = filter_rule_DB_operation.Filter_operation()
                # 调用数据库规则删除函数,返回操作是否成功
                response_data = DB_operation.delete_one_rule(data_dic)
                socklink.sendall(repr(response_data).encode())
                print('delete request from ' + str(address) +
                      " handled successfully." + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

            else:
                # 如果以上格式均不符合,那么打印错误
                print('Unknown request type from ' + str(address) +
                      '. Request cancelled' + "    " +
                      str(datetime.datetime.now().strftime('%F %T')))

    except Exception as error:
        print('Error happened in processing thread. \nClient: ' +
              str(address) + "\nError: " + str(error) + "    " +
              str(datetime.datetime.now().strftime('%F %T')))
        traceback.print_exc()
    finally:
        socklink.close()
コード例 #7
0
def bayesClassifier(x,y):
	clf = bc.BayesClassifier()
	return crossValidation(clf,x,y)