def test_cnn_train(self): # Get them labels! print(PROJECT_DIR) print(DATA_DIR) with io.open(DATA_DIR + '.labels', 'r') as f: labels = [line.rstrip('\n') for line in f] labels = list(set(labels)) # Run the model model = Magpie() a = model.train_word2vec(DATA_DIR, vec_dim=300) print("done2") print("done3") model.init_word_vectors(DATA_DIR, vec_dim=300) model.train(DATA_DIR, labels, nn_model='cnn', test_ratio=0.2, epochs=30) path1 = PROJECT_DIR + '/here1.h5' path2 = PROJECT_DIR + '/embedinghere' path3 = PROJECT_DIR + '/scaler' model.save_word2vec_model(path2) model.save_scaler(path3, overwrite=True) model.save_model(path1) print("thuc hien test") # Do a simple prediction print( model.predict_from_text( 'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại' ))
def test_rnn_batch_train(self): # Get them labels! with io.open(DATA_DIR + '.labels', 'r') as f: labels = {line.rstrip('\n') for line in f} # Run the model model = Magpie() model.init_word_vectors(DATA_DIR, vec_dim=100) history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3) assert history is not None # Do a simple prediction predictions = model.predict_from_text("Black holes are cool!") assert len(predictions) == len(labels) # Assert the hell out of it! for lab, val in predictions: assert lab in labels assert 0 <= val <= 1
'焦虑': 0, '怀疑': 0 } client = pymongo.MongoClient(host='124.70.84.12', port=27017, username="******", password="******") db = client['weibo_keyword_epidemic'] date = '2019-12-08' with open('data/emotion_frequency.csv', 'a+', encoding='utf-8') as f: f.write('日期,满意,喜悦,乐观,愤怒,悲哀,恐惧,厌恶,焦虑,怀疑' + '\n') while datetime.datetime.strptime(date, '%Y-%m-%d') <= datetime.datetime.strptime('2020-01-08', '%Y-%m-%d'): print(date) collection = db[date] documents_obj = collection.find({}) for i in range(0, min(collection.count_documents({}), 3000)): # print(documents_obj[i]['text']) # 拿到每一条微博的情感分析结果 res = magpie.predict_from_text(documents_obj[i]['text']) # 如果最大的数字小于0.75表明没有明显的情绪,跳过 if res[0][1] < 0.75: continue # 第二大的数字比最大的数字小0.05以上则只保留第一个 if res[0][1] - res[1][1] > 0.05: emotion_dict[res[0][0]] = emotion_dict[res[0][0]] + 1 continue # 第三大的数字比第二大的数字小0.03以上则只保留前两个 if res[1][1] - res[2][1] > 0.03: emotion_dict[res[0][0]] = emotion_dict[res[0][0]] + 1 / 2 emotion_dict[res[1][0]] = emotion_dict[res[1][0]] + 1 / 2 continue # 保留前三个 emotion_dict[res[0][0]] = emotion_dict[res[0][0]] + 1 / 3 emotion_dict[res[1][0]] = emotion_dict[res[1][0]] + 1 / 3
class StockPrediction: def __init__(self): File=open("stockLabels2.labels","r") List=[""] for Line in File: List.append(string.replace(Line,'\n','')) self.labels=List result =False path=os.path.join('','savedMagpieModels') latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels) def delete_model(self): del self.model def load_model(self): print('loading model ...') result =False path=os.path.join('','savedMagpieModels') try:#error handeling must be added latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels) print('2222') result=True print('model loaded') except: print('ERR in stockPrediction.loadModel()') return result def create_stocks_bool_json(self, magpie_result): REstock=re.compile(r'[A-Z]+') REprobability=re.compile(r'[0][.][0-9]+') stock_names=[] stock_probability=[] for stock in magpie_result: magpie_result_str=str(stock) listToks=magpie_result_str.split(',') stock_names.append(listToks[0][2:-1]) stock_probability.append(float(listToks[1][1:-1])) #boolList=[0]*len(self.labels) json_dict = {} data = [] for i in stock_names: temp_dic={} labelIndex=str(self.labels.index(i)) if i== 'JCY': r=0 if stock_probability[stock_names.index(i)] >self.THRESHOLD: temp_dic["name"]=i temp_dic["index"]=labelIndex temp_dic["prediction"]=1 else: temp_dic["name"]=i temp_dic["index"]=labelIndex temp_dic["prediction"]=0 data.append(temp_dic) json_dict["news_number"]=100 json_dict["prediction"]=data return json_dict def run(self,news, threshold): self.THRESHOLD=threshold output=self.model.predict_from_text(news) return self.create_stocks_bool_json(output)
from magpie import Magpie with open('categories.labels') as f: labels = [line.rstrip() for line in f.readlines()] magpie = Magpie(keras_model='current_model/model.h5', word2vec_model='current_model/embedding.pkl', scaler='current_model/scaler.pkl', labels=labels) predicted = magpie.predict_from_text( '“Ich denke, Du wirst die Scheibe irgendwo innerhalb dieses Kreises treffen”.' ) print(predicted[:5])
from magpie import Magpie magpie = Magpie( keras_model = 'save/model/best.h5', word2vec_model = 'save/embeddings/best', scaler = 'save/scaler/best', labels = ['1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121', '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141', '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222', '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243', '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331', '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351', '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114', '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132', '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147', '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156', '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171', '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710', '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188', '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224', '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323', '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53', '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74']) # 单条模拟测试数据 text1 = '我想买车票' mag1 = magpie.predict_from_text(text1) print(type(mag1)) print(mag1) ''' #也可以通过从txt文件中读取测试数据进行批量测试 mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt') print(mag2) '''
from magpie import Magpie magpie = Magpie() magpie.init_word_vectors('data/hep-categories', vec_dim=100) labels = [ "Astrophysics", "Experiment-HEP", "Gravitation and Cosmology", "Phenomenology-HEP", "Theory-HEP", ] magpie.train('data/hep-categories', labels, test_ratio=0.2, epochs=30) print(magpie.predict_from_text('Stephen Hawking studies black holes'))
import io import os import unittest from magpie import Magpie PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'hep-categories') with io.open(DATA_DIR + '.labels', 'r') as f: labels = [line.rstrip('\n') for line in f] labels = list(set(labels)) print(len(labels)) print(labels) path1 = PROJECT_DIR + '/here1.h5' path2 = PROJECT_DIR + '/embedinghere' path3 = PROJECT_DIR + '/scaler' magpie = Magpie(keras_model=path1, word2vec_model=path2, scaler=path3, labels=labels) predictions = magpie.predict_from_text( 'toi bi dau bung kham benh het bao nhieu tien') print(predictions[0], predictions[1], predictions[2])
def Deep_learning(df, x_test, target): folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) lab_list = [] for i, row in df.iterrows(): if i > len(df): break else: file_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str( i) + '.txt' lab_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str( i) + '.lab' title_data = df.at[i, target].encode('ascii', 'ignore').decode('ascii') with open(file_name, 'w') as the_file: the_file.write(title_data) row_data = eval(df.at[i, 'group_id']) for j in row_data: lab_list.append(j) with open(lab_name, 'a') as the_file: the_file.write(str(j) + '\n') lab_set = list(set(lab_list)) file = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' + 'categories' + '.labels' for i in lab_set: with open(file, 'a') as the_file: the_file.write(str(i) + '\n') magpie = Magpie() # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100) # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories') magpie.init_word_vectors( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100) with open('test_data/categories.labels') as f: labels = f.readlines() labels = [x.strip() for x in labels] magpie.train( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', labels, test_ratio=0.0, epochs=20) results_dl = {} df_test = pd.DataFrame(np.atleast_2d(x_test), columns=['title']) for i, row in df_test.iterrows(): title_data = df_test.at[i, target].encode('ascii', 'ignore').decode('ascii') title_data = preprocess(title_data) # print("This is title: ", title_data) df_test.at[i, target] = title_data pre_label = [ s[0] for s in magpie.predict_from_text(title_data) if s[1] >= 0.25 ] # print("This is test: ", title_data) # print("This is predict label: ", pre_label) results_dl[title_data] = pre_label return results_dl
magpie = Magpie() magpie.init_word_vectors( '/home/ydm/ren/remote/multiLabel/data/hep-categories', vec_dim=100) print(len(labels)) magpie.train('/home/ydm/ren/remote/multiLabel/data/hep-categories', labels, epochs=30, batch_size=128) magpie.save_word2vec_model( '/home/ydm/ren/remote/multiLabel/data/word2vec_mode_place') magpie.save_scaler('/home/ydm/ren/remote/multiLabel/data/scaler_place', overwrite=True) magpie.save_model('/home/ydm/ren/remote/multiLabel/data/model_place.h5') alltest = getlabel( '/home/ydm/ren/remote/multiLabel/data/allsents_test.txt') # alltest = [alltest] writes = open('/home/ydm/ren/remote/multiLabel/data/result_place.txt', 'w', encoding='utf-8') for sent in alltest: # print(sent) pre_result = magpie.predict_from_text(sent)[:30] # print(pre_result) resultDict = {} for item in pre_result: resultDict[item[0]] = float(item[1]) writes.write(json.dumps(resultDict) + '\n')