def __init__(self):
     File=open("stockLabels2.labels","r")
     List=[""]
     for Line in File:
         List.append(string.replace(Line,'\n',''))
     self.labels=List
     result =False
     
     path=os.path.join('','savedMagpieModels')
     latest_path='savedMagpieModels/'+find_latest(path)
     self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
               word2vec_model=str(latest_path+'/embedding'),
               scaler=str(latest_path+'/scaler'),
               labels=self.labels)
 def load_model(self):
     print('loading model ...')
     result =False
     path=os.path.join('','savedMagpieModels')
     try:#error handeling must be added 
         latest_path='savedMagpieModels/'+find_latest(path)
         self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
               word2vec_model=str(latest_path+'/embedding'),
               scaler=str(latest_path+'/scaler'),
               labels=self.labels)
         print('2222')
         result=True
         print('model loaded')
     except:
         print('ERR in stockPrediction.loadModel()')
     return result
Example #3
0
	def test_rnn_batch_train(self):
		# Get them labels!
		with io.open(DATA_DIR + '.labels', 'r') as f:
			labels = {line.rstrip('\n') for line in f}

		# Run the model
		model = Magpie()
		model.init_word_vectors(DATA_DIR, vec_dim=100)
		history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3)
		assert history is not None

		# Do a simple prediction
		predictions = model.predict_from_text("Black holes are cool!")
		assert len(predictions) == len(labels)

		# Assert the hell out of it!
		for lab, val in predictions:
			assert lab in labels
			assert 0 <= val <= 1
Example #4
0
def reinitialize():
    """
    Reinitialize
    """
    with open(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories.labels'
    ) as f:  # job labels
        labels = f.readlines()
    labels = [x.strip() for x in labels]

    magpie = Magpie(
        keras_model=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5',
        word2vec_model=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here',
        scaler=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here',
        labels=labels)
    return magpie
Example #5
0
    def test_rnn_batch_train(self):
        # Get them labels!
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = {line.rstrip('\n') for line in f}

        # Run the model
        model = Magpie()
        model.init_word_vectors(DATA_DIR, vec_dim=100)
        history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3)
        assert history is not None

        # Do a simple prediction
        predictions = model.predict_from_text("Black holes are cool!")
        assert len(predictions) == len(labels)

        # Assert the hell out of it!
        for lab, val in predictions:
            assert lab in labels
            assert 0 <= val <= 1
Example #6
0
from __future__ import print_function
from magpie import Magpie
import os

dir_path = os.path.dirname(os.path.realpath(os.getcwd()))
dir_path = os.path.join(dir_path, 'text-classification')
labels = open('thread_labels.labels', 'r').read().splitlines()
magpie = Magpie(keras_model='saved_data/model_main',
                word2vec_model='saved_data/word2_vec_model',
                scaler='saved_data/scaler',
                labels=labels)

result = magpie.predict_from_file('test.txt')
Example #7
0
    def test_cnn_train(self):
        # Get them labels!
        print(PROJECT_DIR)
        print(DATA_DIR)
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = [line.rstrip('\n') for line in f]
            labels = list(set(labels))

        # Run the model

        model = Magpie()
        a = model.train_word2vec(DATA_DIR, vec_dim=300)
        print("done2")

        print("done3")
        model.init_word_vectors(DATA_DIR, vec_dim=300)
        model.train(DATA_DIR,
                    labels,
                    nn_model='cnn',
                    test_ratio=0.2,
                    epochs=30)
        path1 = PROJECT_DIR + '/here1.h5'
        path2 = PROJECT_DIR + '/embedinghere'
        path3 = PROJECT_DIR + '/scaler'
        model.save_word2vec_model(path2)
        model.save_scaler(path3, overwrite=True)
        model.save_model(path1)
        print("thuc hien test")

        # Do a simple prediction

        print(
            model.predict_from_text(
                'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại'
            ))
Example #8
0
from magpie import Magpie
import time

count = 10
magpie = Magpie()
while (count <= 500):
    start = time.clock()
    magpie.train_word2vec('data/hep-categories', vec_dim=count)
    magpie.save_word2vec_model('save/embeddings/here' + str(count),
                               overwrite=True)
    end = time.clock()
    runtime = end - start
    print(str(count) + ',' + str(runtime))
    file = open('save/embeddings/here.txt', 'a')
    file.write('\n' + str(count) + ',' + str(runtime))
    file.close()
    count = count + 10
def Deep_learning(df, x_test, target):
    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    lab_list = []
    for i, row in df.iterrows():
        if i > len(df):
            break
        else:
            file_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.txt'
            lab_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.lab'

            title_data = df.at[i, target].encode('ascii',
                                                 'ignore').decode('ascii')

            with open(file_name, 'w') as the_file:
                the_file.write(title_data)

            row_data = eval(df.at[i, 'group_id'])
            for j in row_data:
                lab_list.append(j)
                with open(lab_name, 'a') as the_file:
                    the_file.write(str(j) + '\n')
    lab_set = list(set(lab_list))
    file = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' + 'categories' + '.labels'
    for i in lab_set:
        with open(file, 'a') as the_file:
            the_file.write(str(i) + '\n')

    magpie = Magpie()
    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories')

    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        vec_dim=100)

    with open('test_data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        labels,
        test_ratio=0.0,
        epochs=20)

    results_dl = {}

    df_test = pd.DataFrame(np.atleast_2d(x_test), columns=['title'])

    for i, row in df_test.iterrows():
        title_data = df_test.at[i, target].encode('ascii',
                                                  'ignore').decode('ascii')
        title_data = preprocess(title_data)
        # print("This is title: ", title_data)
        df_test.at[i, target] = title_data

        pre_label = [
            s[0] for s in magpie.predict_from_text(title_data) if s[1] >= 0.25
        ]
        # print("This is test: ", title_data)
        # print("This is predict label: ", pre_label)
        results_dl[title_data] = pre_label
    return results_dl
Example #10
0
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]

#train_dir = 'data/hep-categories'    #2200条数据存放目录
train_dir = 'C:\\data\\Railway_Passenger_Transport'  #2200条数据,以及规章文电存放目录

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [100, 200, 300, 400, 500]:
    try:
        for MIN_WORD_COUNT in [4, 5, 6, 7, 8]:
            for WORD2VEC_CONTEXT in [4, 5, 6, 7, 8]:
                if os.path.exists('log/' + train_dir[-3:] + '_' +
                                  str(EMBEDDING_SIZE) + '_' +
                                  str(MIN_WORD_COUNT) + '_' +
                                  str(WORD2VEC_CONTEXT) + '.txt'):
                    continue
                magpie.train_word2vec(train_dir,
                                      vec_dim=EMBEDDING_SIZE,
                                      MWC=MIN_WORD_COUNT,
                                      w2vc=WORD2VEC_CONTEXT)
                magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
Example #11
0
# magpie = Magpie(
#     keras_model='save/keras_model_zh.h5',
#     word2vec_model='save/word2vec_model_zh',
#     scaler='save/scaler_zh',
#     labels=labels
# )
# # print(magpie.predict_from_file('data/hep-categories/1002413.txt'))
# print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场\
# 传出了嘲笑声,特朗普立即回应道:“这是真的。”此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超音速导弹作战。'))
# print(magpie.predict_from_text('此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超\
# 音速导弹作战。特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'))

labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑']
magpie = Magpie(
    keras_model='save/emotion_keras_model.h5',
    word2vec_model='save/emotion_word2vec_model',
    scaler='save/emotion_scaler',
    labels=labels
)
# print(magpie.predict_from_text('害怕,恐怖如斯'))
# print(magpie.predict_from_text('气死我了'))
# print(magpie.predict_from_text('加油,很快就会好的'))
# print(magpie.predict_from_text('希望早日康复'))
# print(magpie.predict_from_text('英国航母战斗群已于1月达到初始作战能力,这标志着英国海军投射力量能力的一个阶段性变化。'))
# print(magpie.predict_from_text('近年来伊朗、叙利亚、缅甸正逐渐成为朝鲜核技术和导弹技术出口的主要客户,其中伊朗所占的比重较高。'))

emotion_dict = {
    '满意': 0,
    '喜悦': 0,
    '乐观': 0,
    '愤怒': 0,
    '悲哀': 0,
            the_file.write(title_data)

        row_data = eval(df.at[i, 'group_id'])
        for j in row_data:
            lab_list.append(j)
            with open(lab_name, 'a') as the_file:
                the_file.write(str(j) + '\n')
lab_set = list(set(lab_list))
file = '/Users/sunxuan/Documents/Impactpool/seniority analysis/googlecloud_magpie/data/' + 'categories' + '.labels'
for i in lab_set:
    with open(file, 'a') as the_file:
        the_file.write(str(i) + '\n')
"""
train process
"""
magpie = Magpie()
# magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
# magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')

magpie.init_word_vectors(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    vec_dim=100)

with open('data/categories.labels') as f:
    labels = f.readlines()
labels = [x.strip() for x in labels]
magpie.train(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    labels,
    test_ratio=0.0,
    epochs=30)
Example #13
0
import io
import os
import unittest

from magpie import Magpie
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'hep-categories')

with io.open(DATA_DIR + '.labels', 'r') as f:
    labels = [line.rstrip('\n') for line in f]
    labels = list(set(labels))
print(len(labels))
print(labels)
path1 = PROJECT_DIR + '/here1.h5'
path2 = PROJECT_DIR + '/embedinghere'
path3 = PROJECT_DIR + '/scaler'

magpie = Magpie(keras_model=path1,
                word2vec_model=path2,
                scaler=path3,
                labels=labels)

predictions = magpie.predict_from_text(
    'toi bi dau bung kham benh het bao nhieu tien')
print(predictions[0], predictions[1], predictions[2])
labels4 = sys.argv[9]
labels = [  labels1, labels2, labels3, labels4 ]

#print (labels)
dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\models\\' + model_name

os.mkdir(dirName)

model_path = dirName + '\\' + model_name
scaler_path = dirName + '\\scaler_' + model_name
keras_path =  dirName + '\\keras_'+  model_name + '.h5'
#print (model_path)
#print (keras_path)

from magpie import Magpie

magpie = Magpie()

magpie.init_word_vectors(data, vec_dim=vec_num)


magpie.train(data, labels, test_ratio= test_rat, epochs = ep)
#more epoch = more understanding of vector and lower lose rate

#magpie.predict_from_text('ECB to reveal bad loan hurdles for euro zone bank test') #test

magpie.save_word2vec_model(model_path)
magpie.save_scaler(scaler_path, overwrite=True)
magpie.save_model(keras_path)

from magpie import Magpie

with open('categories.labels') as f:
    labels = [line.rstrip() for line in f.readlines()]

magpie = Magpie(keras_model='current_model/model.h5',
                word2vec_model='current_model/embedding.pkl',
                scaler='current_model/scaler.pkl',
                labels=labels)

predicted = magpie.predict_from_text(
    '“Ich denke, Du wirst die Scheibe irgendwo innerhalb dieses Kreises treffen”.'
)
print(predicted[:5])
Example #16
0
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]

#train_dir = 'C:\\magpie-master\\data\\hep-categories'
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'C:\\data\\nlp_chinese_corpus'

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [250, 500]:
    for MIN_WORD_COUNT in [5, 10]:
        for WORD2VEC_CONTEXT in [5, 10]:
            magpie.train_word2vec(train_dir,
                                  vec_dim=EMBEDDING_SIZE,
                                  MWC=MIN_WORD_COUNT,
                                  w2vc=WORD2VEC_CONTEXT)
            magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
            magpie.train('C:\\magpie-master\\data\\hep-categories',
                         labels,
                         callbacks=[lossHistory],
                         test_ratio=0.1,
                         epochs=20)  # 训练,20%数据作为测试数据,20轮
            lossHistory.loss_plot(
Example #17
0
from magpie import Magpie


magpie = Magpie(
keras_model =  'save/model/best.h5',
word2vec_model =  'save/embeddings/best',
scaler = 'save/scaler/best',
labels = ['1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121', '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141', '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222', '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243', '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331', '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351', '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114', '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132', '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147', '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156', '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171', '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710', '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188', '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224', '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323', '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53', '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'])
# 单条模拟测试数据

text1 = '我想买车票'
mag1 = magpie.predict_from_text(text1)
print(type(mag1))
print(mag1)




'''
#也可以通过从txt文件中读取测试数据进行批量测试
 mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt')
 print(mag2)
'''

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@Author: njuselhx
@Time: 2021/1/21 下午7:01
@File: train.py
@Software: PyCharm
"""
from magpie import Magpie
magpie = Magpie()
'''
magpie.init_word_vectors('data/hep-categories-zh', vec_dim=100)
labels = ['军事', '旅游', '政治']
magpie.train('data/hep-categories-zh', labels, test_ratio=0.2, epochs=100)
magpie.save_model('save/keras_model_zh.h5')
magpie.save_word2vec_model('save/word2vec_model_zh', overwrite=True)
magpie.save_scaler('save/scaler_zh', overwrite=True)
print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'))
'''

magpie.init_word_vectors('data/emotion-categories', vec_dim=100)
labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑']
magpie.train('data/emotion-categories', labels, test_ratio=0.2, epochs=2333)
magpie.save_model('save/emotion_keras_model.h5')
magpie.save_word2vec_model('save/emotion_word2vec_model', overwrite=True)
magpie.save_scaler('save/emotion_scaler', overwrite=True)
Example #19
0
from magpie import Magpie

magpie = Magpie()
magpie.init_word_vectors('data/hep-categories', vec_dim=100)
labels = [
    "Astrophysics",
    "Experiment-HEP",
    "Gravitation and Cosmology",
    "Phenomenology-HEP",
    "Theory-HEP",
]
magpie.train('data/hep-categories', labels, test_ratio=0.2, epochs=30)
print(magpie.predict_from_text('Stephen Hawking studies black holes'))
Example #20
0
import os
import sys
sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie(
    keras_model='../workspace/model.h5',
    word2vec_model='../workspace/embeddings',
    scaler='../workspace/scaler',
    labels=['旅游', '军事', '政治']
)

# 单条模拟测试数据
text = '特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'
mag1 = magpie.predict_from_text(text)
print(mag1)

'''
#也可以通过从txt文件中读取测试数据进行批量测试
mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt')
print(mag2)
'''
Example #21
0
import os
import sys

sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie()
magpie.train_word2vec('../data/hep-categories', vec_dim=3)  #训练一个word2vec
magpie.fit_scaler('../data/hep-categories')  #生成scaler
magpie.init_word_vectors('../data/hep-categories', vec_dim=3)  #初始化词向量
labels = ['军事', '旅游', '政治']  #定义所有类别
magpie.train('../data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练,20%数据作为测试数据,5轮

#保存训练后的模型文件
magpie.save_word2vec_model('../workspace/embeddings', overwrite=True)
magpie.save_scaler('../workspace/scaler', overwrite=True)
magpie.save_model('../workspace/model.h5')
Example #22
0
from magpie import Magpie

#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'data/hep-categories'
magpie = Magpie()
magpie.train_word2vec(train_dir, vec_dim=100, MWC=1, w2vc=5)
magpie.fit_scaler('data/hep-categories')
magpie.init_word_vectors('data/hep-categories')

#定义所有类别
labels = [
    '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121',
    '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141',
    '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212',
    '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222',
    '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243',
    '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331',
    '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351',
    '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114',
    '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132',
    '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147',
    '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156',
    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
Example #23
0
def train_magpie(labels):
    magpie = Magpie()
    magpie.init_word_vectors(WRITE_SK_CAT_PATH, vec_dim=VEC_DIM)
    magpie.train(WRITE_SK_CAT_PATH, labels, test_ratio=0.2, epochs=EPOCHS)
    return magpie
class StockPrediction:
    def __init__(self):
        File=open("stockLabels2.labels","r")
        List=[""]
        for Line in File:
            List.append(string.replace(Line,'\n',''))
        self.labels=List
        result =False
        
        path=os.path.join('','savedMagpieModels')
        latest_path='savedMagpieModels/'+find_latest(path)
        self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
                  word2vec_model=str(latest_path+'/embedding'),
                  scaler=str(latest_path+'/scaler'),
                  labels=self.labels)
      def delete_model(self):
        del self.model
        
    def load_model(self):
        print('loading model ...')
        result =False
        path=os.path.join('','savedMagpieModels')
        try:#error handeling must be added 
            latest_path='savedMagpieModels/'+find_latest(path)
            self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
                  word2vec_model=str(latest_path+'/embedding'),
                  scaler=str(latest_path+'/scaler'),
                  labels=self.labels)
            print('2222')
            result=True
            print('model loaded')
        except:
            print('ERR in stockPrediction.loadModel()')
        return result
    

        
    def create_stocks_bool_json(self, magpie_result):        
        REstock=re.compile(r'[A-Z]+')
        REprobability=re.compile(r'[0][.][0-9]+')
        stock_names=[]
        stock_probability=[]
        for stock in magpie_result:
            magpie_result_str=str(stock)
            listToks=magpie_result_str.split(',')
            stock_names.append(listToks[0][2:-1])
            stock_probability.append(float(listToks[1][1:-1]))
                     
        #boolList=[0]*len(self.labels)
        json_dict = {}
        data = []
        for i in stock_names:
            temp_dic={}
            labelIndex=str(self.labels.index(i))
            if i== 'JCY':
                r=0
            if stock_probability[stock_names.index(i)] >self.THRESHOLD:
                temp_dic["name"]=i
                temp_dic["index"]=labelIndex
                temp_dic["prediction"]=1                         
            else:
                temp_dic["name"]=i
                temp_dic["index"]=labelIndex
                temp_dic["prediction"]=0                
            data.append(temp_dic)
        json_dict["news_number"]=100
        json_dict["prediction"]=data        
        return json_dict
            
            
    def run(self,news, threshold):
        self.THRESHOLD=threshold
        output=self.model.predict_from_text(news)
        return self.create_stocks_bool_json(output)
Example #25
0
def load_magpie(labels):
    magpie = Magpie(keras_model=SAVE_MAGPIE_MODEL_PATH,
                    word2vec_model=SAVE_MAGPIE_WORD2VEC_PATH,
                    scaler=SAVE_MAGPIE_SCALER_PATH,
                    labels=labels)
    return magpie
Example #26
0
        with open(address + '/' + Id + '.txt', "a") as file:
            file.write(Text)

        with open(address + '/' + Id + '.lab', "a") as file:
            file.write(label)

        print("Data generation finished.")


address = "/home/ubuntu/toxic/magpie_data"

#data_prep("/Users/wangergou/Downloads/kaggle/Toxic_Comment_Classification/Magpie/data/")

data_prep(address)

magpie = Magpie()

print("Loading word vector... \n")

magpie.train_word2vec(address, vec_dim=100)

print("Initializing data... \n")

magpie.init_word_vectors(address, vec_dim=100)

labels = [
    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
]

print("Training starts... \n")
Example #27
0
from magpie import Magpie

magpie = Magpie()
#train_dir = 'data/hep-categories'
train_dir = 'C:\\data\\Railway_Passenger_Transport'
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
EMBEDDING_SIZE = 50
MIN_WORD_COUNT = 1
WORD2VEC_CONTEXT = 1
magpie.train_word2vec(train_dir,
                      vec_dim=EMBEDDING_SIZE,
                      MWC=MIN_WORD_COUNT,
                      w2vc=WORD2VEC_CONTEXT)
magpie.save_word2vec_model('save/embeddings/' + train_dir[-3:] + '_' +
                           str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) +
                           '_' + str(WORD2VEC_CONTEXT),
                           overwrite=True)
print(train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) +
      '_' + str(WORD2VEC_CONTEXT) + '   Success!!!')
from magpie import Magpie
import os

folder = "magpie_data"
labf = open(folder + "askubuntu.labels", 'r')
labels = labf.read()
labels = labels.split('\n')
labels = [l for l in labels if len(l) > 1]

print("loading model")
magpie = Magpie(keras_model=folder + '/model.h5',
                word2vec_model=folder + '/wordvec',
                scaler=folder + '/scalervec',
                labels=labels)
#print(labels)
Example #29
0
def train_dl(save, vec_dim, epochs):
    """
    train process
    """
    magpie = Magpie()

    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')
    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        vec_dim=vec_dim)

    with open('data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        labels,
        test_ratio=0.0,
        epochs=epochs)

    if save:
        """
        Save model
        """
        magpie.save_word2vec_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here'
        )
        magpie.save_scaler(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here',
            overwrite=True)
        magpie.save_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5'
        )
    return magpie
#print (labels)
dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\models\\' + model_name

#os.mkdir(dirName)

model_path = dirName + '\\' + model_name
scaler_path = dirName + '\\scaler_' + model_name
keras_path =  dirName + '\\keras_'+  model_name + '.h5'

#print (model_path)
#print (keras_path)
#print (scaler_path)

from magpie import Magpie

magpie = Magpie()

magpie = Magpie(
   keras_model=keras_path,
  word2vec_model=model_path,
 scaler= scaler_path,
labels = labelsa
)
#filePath = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify' + model_name + '\\' + '.txt'
path= 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify\\' + model_name + '\\' + val
#print(path)
print (magpie.predict_from_file(path)) #test

#magpie.predict_from_text('Manchester United vs Chelsea')

Example #31
0
        alllabel.append(line)
    return alllabel


#第四步 训练模型预测测试集的标签
if __name__ == '__main__':

    labels = getlabel('/home/ydm/ren/remote/multiLabel/data/labels.txt')
    # magpie = Magpie(
    #     keras_model='/home/ydm/ren/remote/multiLabel/data/here.h5',
    #     word2vec_model='/home/ydm/ren/remote/multiLabel/data/word2vec_mode',
    #     scaler='/home/ydm/ren/remote/multiLabel/data/scaler',
    #     labels=labels
    # )

    magpie = Magpie()
    magpie.init_word_vectors(
        '/home/ydm/ren/remote/multiLabel/data/hep-categories', vec_dim=100)

    print(len(labels))
    magpie.train('/home/ydm/ren/remote/multiLabel/data/hep-categories',
                 labels,
                 epochs=30,
                 batch_size=128)
    magpie.save_word2vec_model(
        '/home/ydm/ren/remote/multiLabel/data/word2vec_mode_place')
    magpie.save_scaler('/home/ydm/ren/remote/multiLabel/data/scaler_place',
                       overwrite=True)
    magpie.save_model('/home/ydm/ren/remote/multiLabel/data/model_place.h5')

    alltest = getlabel(