def __init__(self): File=open("stockLabels2.labels","r") List=[""] for Line in File: List.append(string.replace(Line,'\n','')) self.labels=List result =False path=os.path.join('','savedMagpieModels') latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels)
def load_model(self): print('loading model ...') result =False path=os.path.join('','savedMagpieModels') try:#error handeling must be added latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels) print('2222') result=True print('model loaded') except: print('ERR in stockPrediction.loadModel()') return result
def test_rnn_batch_train(self): # Get them labels! with io.open(DATA_DIR + '.labels', 'r') as f: labels = {line.rstrip('\n') for line in f} # Run the model model = Magpie() model.init_word_vectors(DATA_DIR, vec_dim=100) history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3) assert history is not None # Do a simple prediction predictions = model.predict_from_text("Black holes are cool!") assert len(predictions) == len(labels) # Assert the hell out of it! for lab, val in predictions: assert lab in labels assert 0 <= val <= 1
def reinitialize(): """ Reinitialize """ with open( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories.labels' ) as f: # job labels labels = f.readlines() labels = [x.strip() for x in labels] magpie = Magpie( keras_model= '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5', word2vec_model= '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here', scaler= '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here', labels=labels) return magpie
from __future__ import print_function from magpie import Magpie import os dir_path = os.path.dirname(os.path.realpath(os.getcwd())) dir_path = os.path.join(dir_path, 'text-classification') labels = open('thread_labels.labels', 'r').read().splitlines() magpie = Magpie(keras_model='saved_data/model_main', word2vec_model='saved_data/word2_vec_model', scaler='saved_data/scaler', labels=labels) result = magpie.predict_from_file('test.txt')
def test_cnn_train(self): # Get them labels! print(PROJECT_DIR) print(DATA_DIR) with io.open(DATA_DIR + '.labels', 'r') as f: labels = [line.rstrip('\n') for line in f] labels = list(set(labels)) # Run the model model = Magpie() a = model.train_word2vec(DATA_DIR, vec_dim=300) print("done2") print("done3") model.init_word_vectors(DATA_DIR, vec_dim=300) model.train(DATA_DIR, labels, nn_model='cnn', test_ratio=0.2, epochs=30) path1 = PROJECT_DIR + '/here1.h5' path2 = PROJECT_DIR + '/embedinghere' path3 = PROJECT_DIR + '/scaler' model.save_word2vec_model(path2) model.save_scaler(path3, overwrite=True) model.save_model(path1) print("thuc hien test") # Do a simple prediction print( model.predict_from_text( 'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại' ))
from magpie import Magpie import time count = 10 magpie = Magpie() while (count <= 500): start = time.clock() magpie.train_word2vec('data/hep-categories', vec_dim=count) magpie.save_word2vec_model('save/embeddings/here' + str(count), overwrite=True) end = time.clock() runtime = end - start print(str(count) + ',' + str(runtime)) file = open('save/embeddings/here.txt', 'a') file.write('\n' + str(count) + ',' + str(runtime)) file.close() count = count + 10
def Deep_learning(df, x_test, target): folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' for the_file in os.listdir(folder): file_path = os.path.join(folder, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) # elif os.path.isdir(file_path): shutil.rmtree(file_path) except Exception as e: print(e) lab_list = [] for i, row in df.iterrows(): if i > len(df): break else: file_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str( i) + '.txt' lab_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str( i) + '.lab' title_data = df.at[i, target].encode('ascii', 'ignore').decode('ascii') with open(file_name, 'w') as the_file: the_file.write(title_data) row_data = eval(df.at[i, 'group_id']) for j in row_data: lab_list.append(j) with open(lab_name, 'a') as the_file: the_file.write(str(j) + '\n') lab_set = list(set(lab_list)) file = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' + 'categories' + '.labels' for i in lab_set: with open(file, 'a') as the_file: the_file.write(str(i) + '\n') magpie = Magpie() # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100) # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories') magpie.init_word_vectors( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100) with open('test_data/categories.labels') as f: labels = f.readlines() labels = [x.strip() for x in labels] magpie.train( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', labels, test_ratio=0.0, epochs=20) results_dl = {} df_test = pd.DataFrame(np.atleast_2d(x_test), columns=['title']) for i, row in df_test.iterrows(): title_data = df_test.at[i, target].encode('ascii', 'ignore').decode('ascii') title_data = preprocess(title_data) # print("This is title: ", title_data) df_test.at[i, target] = title_data pre_label = [ s[0] for s in magpie.predict_from_text(title_data) if s[1] >= 0.25 ] # print("This is test: ", title_data) # print("This is predict label: ", pre_label) results_dl[title_data] = pre_label return results_dl
'54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74' ] #train_dir = 'data/hep-categories' #2200条数据存放目录 train_dir = 'C:\\data\\Railway_Passenger_Transport' #2200条数据,以及规章文电存放目录 Success = 'Success:' error = 'error:' magpie = Magpie() lossHistory = LossHistory() for EMBEDDING_SIZE in [100, 200, 300, 400, 500]: try: for MIN_WORD_COUNT in [4, 5, 6, 7, 8]: for WORD2VEC_CONTEXT in [4, 5, 6, 7, 8]: if os.path.exists('log/' + train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' + str(WORD2VEC_CONTEXT) + '.txt'): continue magpie.train_word2vec(train_dir, vec_dim=EMBEDDING_SIZE, MWC=MIN_WORD_COUNT, w2vc=WORD2VEC_CONTEXT) magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
# magpie = Magpie( # keras_model='save/keras_model_zh.h5', # word2vec_model='save/word2vec_model_zh', # scaler='save/scaler_zh', # labels=labels # ) # # print(magpie.predict_from_file('data/hep-categories/1002413.txt')) # print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场\ # 传出了嘲笑声,特朗普立即回应道:“这是真的。”此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超音速导弹作战。')) # print(magpie.predict_from_text('此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超\ # 音速导弹作战。特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”')) labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑'] magpie = Magpie( keras_model='save/emotion_keras_model.h5', word2vec_model='save/emotion_word2vec_model', scaler='save/emotion_scaler', labels=labels ) # print(magpie.predict_from_text('害怕,恐怖如斯')) # print(magpie.predict_from_text('气死我了')) # print(magpie.predict_from_text('加油,很快就会好的')) # print(magpie.predict_from_text('希望早日康复')) # print(magpie.predict_from_text('英国航母战斗群已于1月达到初始作战能力,这标志着英国海军投射力量能力的一个阶段性变化。')) # print(magpie.predict_from_text('近年来伊朗、叙利亚、缅甸正逐渐成为朝鲜核技术和导弹技术出口的主要客户,其中伊朗所占的比重较高。')) emotion_dict = { '满意': 0, '喜悦': 0, '乐观': 0, '愤怒': 0, '悲哀': 0,
the_file.write(title_data) row_data = eval(df.at[i, 'group_id']) for j in row_data: lab_list.append(j) with open(lab_name, 'a') as the_file: the_file.write(str(j) + '\n') lab_set = list(set(lab_list)) file = '/Users/sunxuan/Documents/Impactpool/seniority analysis/googlecloud_magpie/data/' + 'categories' + '.labels' for i in lab_set: with open(file, 'a') as the_file: the_file.write(str(i) + '\n') """ train process """ magpie = Magpie() # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100) # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories') magpie.init_word_vectors( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100) with open('data/categories.labels') as f: labels = f.readlines() labels = [x.strip() for x in labels] magpie.train( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', labels, test_ratio=0.0, epochs=30)
import io import os import unittest from magpie import Magpie PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__))) DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'hep-categories') with io.open(DATA_DIR + '.labels', 'r') as f: labels = [line.rstrip('\n') for line in f] labels = list(set(labels)) print(len(labels)) print(labels) path1 = PROJECT_DIR + '/here1.h5' path2 = PROJECT_DIR + '/embedinghere' path3 = PROJECT_DIR + '/scaler' magpie = Magpie(keras_model=path1, word2vec_model=path2, scaler=path3, labels=labels) predictions = magpie.predict_from_text( 'toi bi dau bung kham benh het bao nhieu tien') print(predictions[0], predictions[1], predictions[2])
labels4 = sys.argv[9] labels = [ labels1, labels2, labels3, labels4 ] #print (labels) dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\models\\' + model_name os.mkdir(dirName) model_path = dirName + '\\' + model_name scaler_path = dirName + '\\scaler_' + model_name keras_path = dirName + '\\keras_'+ model_name + '.h5' #print (model_path) #print (keras_path) from magpie import Magpie magpie = Magpie() magpie.init_word_vectors(data, vec_dim=vec_num) magpie.train(data, labels, test_ratio= test_rat, epochs = ep) #more epoch = more understanding of vector and lower lose rate #magpie.predict_from_text('ECB to reveal bad loan hurdles for euro zone bank test') #test magpie.save_word2vec_model(model_path) magpie.save_scaler(scaler_path, overwrite=True) magpie.save_model(keras_path)
from magpie import Magpie with open('categories.labels') as f: labels = [line.rstrip() for line in f.readlines()] magpie = Magpie(keras_model='current_model/model.h5', word2vec_model='current_model/embedding.pkl', scaler='current_model/scaler.pkl', labels=labels) predicted = magpie.predict_from_text( '“Ich denke, Du wirst die Scheibe irgendwo innerhalb dieses Kreises treffen”.' ) print(predicted[:5])
'7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74' ] #train_dir = 'C:\\magpie-master\\data\\hep-categories' #train_dir = 'C:\\data\\Railway_Passenger_Transport' train_dir = 'C:\\data\\nlp_chinese_corpus' Success = 'Success:' error = 'error:' magpie = Magpie() lossHistory = LossHistory() for EMBEDDING_SIZE in [250, 500]: for MIN_WORD_COUNT in [5, 10]: for WORD2VEC_CONTEXT in [5, 10]: magpie.train_word2vec(train_dir, vec_dim=EMBEDDING_SIZE, MWC=MIN_WORD_COUNT, w2vc=WORD2VEC_CONTEXT) magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories') magpie.train('C:\\magpie-master\\data\\hep-categories', labels, callbacks=[lossHistory], test_ratio=0.1, epochs=20) # 训练,20%数据作为测试数据,20轮 lossHistory.loss_plot(
from magpie import Magpie magpie = Magpie( keras_model = 'save/model/best.h5', word2vec_model = 'save/embeddings/best', scaler = 'save/scaler/best', labels = ['1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121', '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141', '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222', '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243', '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331', '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351', '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114', '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132', '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147', '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156', '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171', '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710', '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188', '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224', '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323', '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53', '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74']) # 单条模拟测试数据 text1 = '我想买车票' mag1 = magpie.predict_from_text(text1) print(type(mag1)) print(mag1) ''' #也可以通过从txt文件中读取测试数据进行批量测试 mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt') print(mag2) '''
#!/usr/bin/python # -*- coding: UTF-8 -*- """ @Author: njuselhx @Time: 2021/1/21 下午7:01 @File: train.py @Software: PyCharm """ from magpie import Magpie magpie = Magpie() ''' magpie.init_word_vectors('data/hep-categories-zh', vec_dim=100) labels = ['军事', '旅游', '政治'] magpie.train('data/hep-categories-zh', labels, test_ratio=0.2, epochs=100) magpie.save_model('save/keras_model_zh.h5') magpie.save_word2vec_model('save/word2vec_model_zh', overwrite=True) magpie.save_scaler('save/scaler_zh', overwrite=True) print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”')) ''' magpie.init_word_vectors('data/emotion-categories', vec_dim=100) labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑'] magpie.train('data/emotion-categories', labels, test_ratio=0.2, epochs=2333) magpie.save_model('save/emotion_keras_model.h5') magpie.save_word2vec_model('save/emotion_word2vec_model', overwrite=True) magpie.save_scaler('save/emotion_scaler', overwrite=True)
from magpie import Magpie magpie = Magpie() magpie.init_word_vectors('data/hep-categories', vec_dim=100) labels = [ "Astrophysics", "Experiment-HEP", "Gravitation and Cosmology", "Phenomenology-HEP", "Theory-HEP", ] magpie.train('data/hep-categories', labels, test_ratio=0.2, epochs=30) print(magpie.predict_from_text('Stephen Hawking studies black holes'))
import os import sys sys.path.append(os.path.realpath(os.getcwd())) sys.path.append("..") from magpie import Magpie magpie = Magpie( keras_model='../workspace/model.h5', word2vec_model='../workspace/embeddings', scaler='../workspace/scaler', labels=['旅游', '军事', '政治'] ) # 单条模拟测试数据 text = '特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”' mag1 = magpie.predict_from_text(text) print(mag1) ''' #也可以通过从txt文件中读取测试数据进行批量测试 mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt') print(mag2) '''
import os import sys sys.path.append(os.path.realpath(os.getcwd())) sys.path.append("..") from magpie import Magpie magpie = Magpie() magpie.train_word2vec('../data/hep-categories', vec_dim=3) #训练一个word2vec magpie.fit_scaler('../data/hep-categories') #生成scaler magpie.init_word_vectors('../data/hep-categories', vec_dim=3) #初始化词向量 labels = ['军事', '旅游', '政治'] #定义所有类别 magpie.train('../data/hep-categories', labels, test_ratio=0.2, epochs=20) #训练,20%数据作为测试数据,5轮 #保存训练后的模型文件 magpie.save_word2vec_model('../workspace/embeddings', overwrite=True) magpie.save_scaler('../workspace/scaler', overwrite=True) magpie.save_model('../workspace/model.h5')
from magpie import Magpie #train_dir = 'C:\\data\\Railway_Passenger_Transport' train_dir = 'data/hep-categories' magpie = Magpie() magpie.train_word2vec(train_dir, vec_dim=100, MWC=1, w2vc=5) magpie.fit_scaler('data/hep-categories') magpie.init_word_vectors('data/hep-categories') #定义所有类别 labels = [ '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121', '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141', '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222', '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243', '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331', '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351', '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114', '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132', '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147', '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156', '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171', '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710', '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188', '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224', '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323', '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53', '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
def train_magpie(labels): magpie = Magpie() magpie.init_word_vectors(WRITE_SK_CAT_PATH, vec_dim=VEC_DIM) magpie.train(WRITE_SK_CAT_PATH, labels, test_ratio=0.2, epochs=EPOCHS) return magpie
class StockPrediction: def __init__(self): File=open("stockLabels2.labels","r") List=[""] for Line in File: List.append(string.replace(Line,'\n','')) self.labels=List result =False path=os.path.join('','savedMagpieModels') latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels) def delete_model(self): del self.model def load_model(self): print('loading model ...') result =False path=os.path.join('','savedMagpieModels') try:#error handeling must be added latest_path='savedMagpieModels/'+find_latest(path) self.model=Magpie(keras_model=str(latest_path+'/model.h5'), word2vec_model=str(latest_path+'/embedding'), scaler=str(latest_path+'/scaler'), labels=self.labels) print('2222') result=True print('model loaded') except: print('ERR in stockPrediction.loadModel()') return result def create_stocks_bool_json(self, magpie_result): REstock=re.compile(r'[A-Z]+') REprobability=re.compile(r'[0][.][0-9]+') stock_names=[] stock_probability=[] for stock in magpie_result: magpie_result_str=str(stock) listToks=magpie_result_str.split(',') stock_names.append(listToks[0][2:-1]) stock_probability.append(float(listToks[1][1:-1])) #boolList=[0]*len(self.labels) json_dict = {} data = [] for i in stock_names: temp_dic={} labelIndex=str(self.labels.index(i)) if i== 'JCY': r=0 if stock_probability[stock_names.index(i)] >self.THRESHOLD: temp_dic["name"]=i temp_dic["index"]=labelIndex temp_dic["prediction"]=1 else: temp_dic["name"]=i temp_dic["index"]=labelIndex temp_dic["prediction"]=0 data.append(temp_dic) json_dict["news_number"]=100 json_dict["prediction"]=data return json_dict def run(self,news, threshold): self.THRESHOLD=threshold output=self.model.predict_from_text(news) return self.create_stocks_bool_json(output)
def load_magpie(labels): magpie = Magpie(keras_model=SAVE_MAGPIE_MODEL_PATH, word2vec_model=SAVE_MAGPIE_WORD2VEC_PATH, scaler=SAVE_MAGPIE_SCALER_PATH, labels=labels) return magpie
with open(address + '/' + Id + '.txt', "a") as file: file.write(Text) with open(address + '/' + Id + '.lab', "a") as file: file.write(label) print("Data generation finished.") address = "/home/ubuntu/toxic/magpie_data" #data_prep("/Users/wangergou/Downloads/kaggle/Toxic_Comment_Classification/Magpie/data/") data_prep(address) magpie = Magpie() print("Loading word vector... \n") magpie.train_word2vec(address, vec_dim=100) print("Initializing data... \n") magpie.init_word_vectors(address, vec_dim=100) labels = [ 'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate' ] print("Training starts... \n")
from magpie import Magpie magpie = Magpie() #train_dir = 'data/hep-categories' train_dir = 'C:\\data\\Railway_Passenger_Transport' #train_dir = 'C:\\data\\Railway_Passenger_Transport' EMBEDDING_SIZE = 50 MIN_WORD_COUNT = 1 WORD2VEC_CONTEXT = 1 magpie.train_word2vec(train_dir, vec_dim=EMBEDDING_SIZE, MWC=MIN_WORD_COUNT, w2vc=WORD2VEC_CONTEXT) magpie.save_word2vec_model('save/embeddings/' + train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' + str(WORD2VEC_CONTEXT), overwrite=True) print(train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' + str(WORD2VEC_CONTEXT) + ' Success!!!')
from magpie import Magpie import os folder = "magpie_data" labf = open(folder + "askubuntu.labels", 'r') labels = labf.read() labels = labels.split('\n') labels = [l for l in labels if len(l) > 1] print("loading model") magpie = Magpie(keras_model=folder + '/model.h5', word2vec_model=folder + '/wordvec', scaler=folder + '/scalervec', labels=labels) #print(labels)
def train_dl(save, vec_dim, epochs): """ train process """ magpie = Magpie() # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100) # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories') magpie.init_word_vectors( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=vec_dim) with open('data/categories.labels') as f: labels = f.readlines() labels = [x.strip() for x in labels] magpie.train( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', labels, test_ratio=0.0, epochs=epochs) if save: """ Save model """ magpie.save_word2vec_model( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here' ) magpie.save_scaler( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here', overwrite=True) magpie.save_model( '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5' ) return magpie
#print (labels) dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\models\\' + model_name #os.mkdir(dirName) model_path = dirName + '\\' + model_name scaler_path = dirName + '\\scaler_' + model_name keras_path = dirName + '\\keras_'+ model_name + '.h5' #print (model_path) #print (keras_path) #print (scaler_path) from magpie import Magpie magpie = Magpie() magpie = Magpie( keras_model=keras_path, word2vec_model=model_path, scaler= scaler_path, labels = labelsa ) #filePath = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify' + model_name + '\\' + '.txt' path= 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify\\' + model_name + '\\' + val #print(path) print (magpie.predict_from_file(path)) #test #magpie.predict_from_text('Manchester United vs Chelsea')
alllabel.append(line) return alllabel #第四步 训练模型预测测试集的标签 if __name__ == '__main__': labels = getlabel('/home/ydm/ren/remote/multiLabel/data/labels.txt') # magpie = Magpie( # keras_model='/home/ydm/ren/remote/multiLabel/data/here.h5', # word2vec_model='/home/ydm/ren/remote/multiLabel/data/word2vec_mode', # scaler='/home/ydm/ren/remote/multiLabel/data/scaler', # labels=labels # ) magpie = Magpie() magpie.init_word_vectors( '/home/ydm/ren/remote/multiLabel/data/hep-categories', vec_dim=100) print(len(labels)) magpie.train('/home/ydm/ren/remote/multiLabel/data/hep-categories', labels, epochs=30, batch_size=128) magpie.save_word2vec_model( '/home/ydm/ren/remote/multiLabel/data/word2vec_mode_place') magpie.save_scaler('/home/ydm/ren/remote/multiLabel/data/scaler_place', overwrite=True) magpie.save_model('/home/ydm/ren/remote/multiLabel/data/model_place.h5') alltest = getlabel(