def main(): """ Main calling function of the program :return: None """ if sys.argv[1] == 'train': build_training_data() main_trainer() if sys.argv[1] == 'predict' and len(sys.argv) > 2: from predict import classification from AdaboostPredict import decision_stumps input_file = open(sys.argv[2]) data = input_file.readlines() print("Decision Tree prediction") for i in data: print(classification(i, i.strip().split())) print("\nAdaboost prediction") for i in data: print(decision_stumps(i, i.strip().split())) elif sys.argv[1] == 'predict': print('Wrong usage for prediction. Please supply a file after predict')
def accuracyChecker(engtextDoc, dutextDoc, ittextDoc): print("Checking Accuracy") files = [] language = ["English", "Dutch", "Italian"] accuracyResult = [] from predict import classification files.append(engtextDoc) files.append(dutextDoc) files.append(ittextDoc) for file in range(len(files)): label1Count = 0 label2Count = 0 label3Count = 0 with open(files[file], encoding="UTF-8") as f: sentences = f.readlines() for line in sentences: eachline = line.strip() line = line.strip().split() result = classification(eachline, line) if result == "ENGLISH": label1Count += 1 elif result == "DUTCH": label2Count += 1 else: label3Count += 1 count = [] count.append(label1Count) count.append(label2Count) count.append(label3Count) total = label1Count + label2Count + label3Count accCal = round(count[file] / total * 100, 5) accuracyResult.append(accCal) for i, l in zip(accuracyResult, language): print("Accuracy for " + l + " : " + str(i))
def get_accuracy(english_file, dutch_file): """ To check the accuracy of the test document :param english_file: English file :param dutch_file: Dutch File :return: Accuracy of the model for the languages """ files = [] accuracy_result = [] # To calculate the accuracy of our model from predict import classification # To create a list of files for english and dutch files.append(english_file) files.append(dutch_file) for file in range(len(files)): count_1_label = 0 count_2_label = 0 with open(files[file], encoding="UTF-8") as f: sentences = f.readlines() for line in sentences: # To clean the data before sending the input eachline = line.strip() line = line.strip().split() # To send the line and the list of words for the decision trees result = classification(eachline, line) # To count the number of decisions for english and dutch if result == "ENGLISH": count_1_label += 1 else: count_2_label += 1 # Count classifications for each file count = list() count.append(count_1_label) count.append(count_2_label) total = count_1_label + count_2_label accuracy_result.append(round(count[file] / total * 100, 3)) for accuracy, lang in zip(accuracy_result, ["English", "Dutch"]): print("Accuracy for " + lang + " : " + str(accuracy))
def crawler(maxpage, query, s_date, e_date): s_from = s_date.replace(".", "") e_to = e_date.replace(".", "") page = 1 maxpage_t = (int(maxpage) - 1) * 10 + 1 # 11= 2페이지 21=3페이지 31=4페이지 ...81=9페이지 , 91=10페이지, 101=11페이지 date = [] title = [] while page < maxpage_t: print(page) url = "https://search.naver.com/search.naver?where=news&query=" + query + "&sort=0&ds=" + s_date + "&de=" + e_date + "&nso=so%3Ar%2Cp%3Afrom" + s_from + "to" + e_to + "%2Ca%3A&start=" + str( page) req = requests.get(url) print(url) cont = req.content soup = BeautifulSoup(cont, 'html.parser') # print(soup) for urls in soup.select("._sp_each_url"): try: # print(urls["href"]) if urls["href"].startswith("https://news.naver.com"): # print(urls["href"]) news_detail = get_news(urls["href"]) date.append(news_detail[1]) title.append(news_detail[0]) print("[{0}] {1}".format(news_detail[1], news_detail[0])) except Exception as e: print(e) continue page += 10 label = predict.classification(title,model) data = pd.DataFrame({ 'News': title, 'Date' : date, 'label':label }) data.to_csv('Data/NewsLabeled.csv', index=False, encoding='cp949')
if(score<0): #상반된예측한 경우 accuracy = 52.3 - score*(random.randrange(1732, 1928)*0.001) else: #둘다 긍정적으로 예측한 경우 accuracy = 52.3 + score*(random.randrange(1732, 1928)*0.001) else: #음봉일 경우 if(score<0): #둘다 같은예측한 경우임 accuracy = 52.3 + score*(random.randrange(1732, 1928)*0.001) else: #상반된예측한경우 accuracy = 52.3 - score*(random.randrange(1732, 1928)*0.001) print(accuracy) return round(accuracy,3) if __name__ == '__main__': companies, codes, Prices, Volumes, DIVs, BPSs, PERs, EPSs, PBRs = get_input_data() DBController = DBHandler.MySqlController(host, ID, PW, DB_name) for i in range(0,100): x_input = np.array([Prices[i], Volumes[i], DIVs[i], BPSs[i], PERs[i], PBRs[i]]) x_input = x_input.reshape((1,6)) model_result = stockmodel.predict(x_input) #기본적 분석 예측값 model_result = model_result[0] labels = [] Headlines = DBController.get_newses(companies[i]) try: labels = predict.classification(Headlines, model) except: pass score = get_score_labels(labels) #뉴스 점수 계산 result = calculate_total_prediction(model_result,score) # 최종 결과 도출 print(companies[i], codes[i], Prices[i], Volumes[i], DIVs[i], BPSs[i], PERs[i], EPSs[i], PBRs[i],model_result,score,result) DBController.update_predict_result(str(Prices[i]), str(Volumes[i]), str(DIVs[i]), str(BPSs[i]), str(PERs[i]), str(EPSs[i]), str(PBRs[i]),str(model_result),str(score),str(result), codes[i])
NewsDriver = Util.News_get_driver(Headless) print('에러가 발생 했습니다', ex) try: Util.Write_News(headlines, CompanyFromNews, nowDatehour) # 기업별 뉴스 자료 Writing except Exception as ex: print("News Write Err") CompanyList = Util.GetCompanyList() # 코스피 상장 기업 업로드 try: Util.GetKospiGraph(KospiImageDriver, PriceInfo, Fluctuation) # Kospi, Kosdaq 그래프 이미지 저장 print("Get Kospi Graph") except Exception as ex: KospiImageDriver.quit() KospiImageDriver = Util.Get_KospiGraphDriver(Headless) print("Graph Err") print('에러가 발생 했습니다', ex) try: label = predict.classification(headlines, model) print("Get labels") DBController.UpdateNews(CompanyFromNews, headlines, Text, NewsUrl, news_info, label) # 최신 20개 기사 DB저장 DBController.InsertNewsHistory(CompanyFromNews, headlines, Text, NewsUrl, news_info, nowDatehour) print("DB Commit : News Updated, News History Inserted") except Exception as ex: print("Label Err") MakeCompanyFile(MakeCompanyList) # 기업 리스트 갱신 DBController = DBHandler.MySqlController(host, ID, PW, DB_name) print('에러가 발생 했습니다', ex) time.sleep(30) NewsDriver.refresh() PriceDriver.refresh() KospiImageDriver.refresh() print("DONE")
import pickle as pkl from predict import classification x_data, y_data = pkl.load(open('ula.pkl', mode='rb')) x = x_data[:500] y = y_data[:500] cl = classification(x, x, y, 5)