예제 #1
0
def main():
    """
    Main calling function of the program
    :return: None
    """
    if sys.argv[1] == 'train':
        build_training_data()
        main_trainer()

    if sys.argv[1] == 'predict' and len(sys.argv) > 2:
        from predict import classification
        from AdaboostPredict import decision_stumps
        input_file = open(sys.argv[2])
        data = input_file.readlines()
        print("Decision Tree prediction")
        for i in data:
            print(classification(i, i.strip().split()))

        print("\nAdaboost prediction")

        for i in data:
            print(decision_stumps(i, i.strip().split()))

    elif sys.argv[1] == 'predict':
        print('Wrong usage for prediction. Please supply a file after predict')
예제 #2
0
def accuracyChecker(engtextDoc, dutextDoc, ittextDoc):
    print("Checking Accuracy")

    files = []
    language = ["English", "Dutch", "Italian"]
    accuracyResult = []
    from predict import classification
    files.append(engtextDoc)
    files.append(dutextDoc)
    files.append(ittextDoc)
    for file in range(len(files)):
        label1Count = 0
        label2Count = 0
        label3Count = 0
        with open(files[file], encoding="UTF-8") as f:
            sentences = f.readlines()
            for line in sentences:
                eachline = line.strip()
                line = line.strip().split()
                result = classification(eachline, line)
                if result == "ENGLISH":
                    label1Count += 1
                elif result == "DUTCH":
                    label2Count += 1
                else:
                    label3Count += 1
            count = []
            count.append(label1Count)
            count.append(label2Count)
            count.append(label3Count)
            total = label1Count + label2Count + label3Count
            accCal = round(count[file] / total * 100, 5)
            accuracyResult.append(accCal)
    for i, l in zip(accuracyResult, language):
        print("Accuracy for " + l + " : " + str(i))
예제 #3
0
def get_accuracy(english_file, dutch_file):
    """
    To check the accuracy of the test document
    :param english_file: English file
    :param dutch_file: Dutch File
    :return: Accuracy of the model for the languages
    """
    files = []
    accuracy_result = []

    # To calculate the accuracy of our model
    from predict import classification

    # To create a list of files for english and dutch
    files.append(english_file)
    files.append(dutch_file)

    for file in range(len(files)):
        count_1_label = 0
        count_2_label = 0
        with open(files[file], encoding="UTF-8") as f:
            sentences = f.readlines()
            for line in sentences:

                # To clean the data before sending the input
                eachline = line.strip()
                line = line.strip().split()

                # To send the line and the list of words for the decision trees
                result = classification(eachline, line)

                # To count the number of decisions for english and dutch
                if result == "ENGLISH":
                    count_1_label += 1
                else:
                    count_2_label += 1

            # Count classifications for each file
            count = list()
            count.append(count_1_label)
            count.append(count_2_label)
            total = count_1_label + count_2_label
            accuracy_result.append(round(count[file] / total * 100, 3))
    for accuracy, lang in zip(accuracy_result, ["English", "Dutch"]):
        print("Accuracy for " + lang + " : " + str(accuracy))
예제 #4
0
def crawler(maxpage, query, s_date, e_date):
    s_from = s_date.replace(".", "")
    e_to = e_date.replace(".", "")
    page = 1
    maxpage_t = (int(maxpage) - 1) * 10 + 1  # 11= 2페이지 21=3페이지 31=4페이지  ...81=9페이지 , 91=10페이지, 101=11페이지
    date = []
    title = []
    while page < maxpage_t:

        print(page)

        url = "https://search.naver.com/search.naver?where=news&query=" + query + "&sort=0&ds=" + s_date + "&de=" + e_date + "&nso=so%3Ar%2Cp%3Afrom" + s_from + "to" + e_to + "%2Ca%3A&start=" + str(
            page)

        req = requests.get(url)
        print(url)
        cont = req.content
        soup = BeautifulSoup(cont, 'html.parser')
        # print(soup)



        for urls in soup.select("._sp_each_url"):
            try:
                # print(urls["href"])
                if urls["href"].startswith("https://news.naver.com"):
                    # print(urls["href"])
                    news_detail = get_news(urls["href"])
                    date.append(news_detail[1])
                    title.append(news_detail[0])
                    print("[{0}] {1}".format(news_detail[1], news_detail[0]))
            except Exception as e:
                print(e)
                continue
        page += 10
    label = predict.classification(title,model)
    data = pd.DataFrame({
        'News': title,
        'Date' : date,
        'label':label
    })
    data.to_csv('Data/NewsLabeled.csv', index=False, encoding='cp949')
예제 #5
0
        if(score<0): #상반된예측한 경우
            accuracy = 52.3 - score*(random.randrange(1732, 1928)*0.001)
        else: #둘다 긍정적으로 예측한 경우
            accuracy = 52.3 + score*(random.randrange(1732, 1928)*0.001)

    else: #음봉일 경우
        if(score<0): #둘다 같은예측한 경우임
            accuracy = 52.3 + score*(random.randrange(1732, 1928)*0.001)
        else: #상반된예측한경우
            accuracy = 52.3 - score*(random.randrange(1732, 1928)*0.001)
    print(accuracy)
    return round(accuracy,3)
if __name__ == '__main__':
    companies, codes, Prices, Volumes, DIVs, BPSs, PERs, EPSs, PBRs = get_input_data()
    DBController = DBHandler.MySqlController(host, ID, PW, DB_name)
    for i in range(0,100):
        x_input = np.array([Prices[i], Volumes[i], DIVs[i], BPSs[i], PERs[i], PBRs[i]])
        x_input = x_input.reshape((1,6))
        model_result = stockmodel.predict(x_input) #기본적 분석 예측값
        model_result = model_result[0]
        labels = []
        Headlines = DBController.get_newses(companies[i])
        try:
            labels = predict.classification(Headlines, model)
        except:
            pass
        score = get_score_labels(labels) #뉴스 점수 계산
        result = calculate_total_prediction(model_result,score) # 최종 결과 도출
        print(companies[i], codes[i], Prices[i], Volumes[i], DIVs[i], BPSs[i], PERs[i], EPSs[i], PBRs[i],model_result,score,result)
        DBController.update_predict_result(str(Prices[i]), str(Volumes[i]), str(DIVs[i]),
                                           str(BPSs[i]), str(PERs[i]), str(EPSs[i]), str(PBRs[i]),str(model_result),str(score),str(result), codes[i])
예제 #6
0
     NewsDriver = Util.News_get_driver(Headless)
     print('에러가 발생 했습니다', ex)
 try:
     Util.Write_News(headlines, CompanyFromNews, nowDatehour)  # 기업별 뉴스 자료 Writing
 except Exception as ex:
     print("News Write Err")
     CompanyList = Util.GetCompanyList()  # 코스피 상장 기업 업로드
 try:
     Util.GetKospiGraph(KospiImageDriver, PriceInfo, Fluctuation) # Kospi, Kosdaq 그래프 이미지 저장
     print("Get Kospi Graph")
 except Exception as ex:
     KospiImageDriver.quit()
     KospiImageDriver = Util.Get_KospiGraphDriver(Headless)
     print("Graph Err")
     print('에러가 발생 했습니다', ex)
 try:
     label = predict.classification(headlines, model)
     print("Get labels")
     DBController.UpdateNews(CompanyFromNews, headlines, Text, NewsUrl, news_info, label)  # 최신 20개 기사 DB저장
     DBController.InsertNewsHistory(CompanyFromNews, headlines, Text, NewsUrl, news_info, nowDatehour)
     print("DB Commit : News Updated, News History Inserted")
 except Exception as ex:
     print("Label Err")
     MakeCompanyFile(MakeCompanyList)  # 기업 리스트 갱신
     DBController = DBHandler.MySqlController(host, ID, PW, DB_name)
     print('에러가 발생 했습니다', ex)
 time.sleep(30)
 NewsDriver.refresh()
 PriceDriver.refresh()
 KospiImageDriver.refresh()
 print("DONE")
예제 #7
0
import pickle as pkl
from predict import classification

x_data, y_data = pkl.load(open('ula.pkl', mode='rb'))
x = x_data[:500]
y = y_data[:500]

cl = classification(x, x, y, 5)