def __init__(self):
     File=open("stockLabels2.labels","r")
     List=[""]
     for Line in File:
         List.append(string.replace(Line,'\n',''))
     self.labels=List
     result =False
     
     path=os.path.join('','savedMagpieModels')
     latest_path='savedMagpieModels/'+find_latest(path)
     self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
               word2vec_model=str(latest_path+'/embedding'),
               scaler=str(latest_path+'/scaler'),
               labels=self.labels)
 def load_model(self):
     print('loading model ...')
     result =False
     path=os.path.join('','savedMagpieModels')
     try:#error handeling must be added 
         latest_path='savedMagpieModels/'+find_latest(path)
         self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
               word2vec_model=str(latest_path+'/embedding'),
               scaler=str(latest_path+'/scaler'),
               labels=self.labels)
         print('2222')
         result=True
         print('model loaded')
     except:
         print('ERR in stockPrediction.loadModel()')
     return result
예제 #3
0
파일: test_api.py 프로젝트: netrasys/magpie
	def test_rnn_batch_train(self):
		# Get them labels!
		with io.open(DATA_DIR + '.labels', 'r') as f:
			labels = {line.rstrip('\n') for line in f}

		# Run the model
		model = Magpie()
		model.init_word_vectors(DATA_DIR, vec_dim=100)
		history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3)
		assert history is not None

		# Do a simple prediction
		predictions = model.predict_from_text("Black holes are cool!")
		assert len(predictions) == len(labels)

		# Assert the hell out of it!
		for lab, val in predictions:
			assert lab in labels
			assert 0 <= val <= 1
예제 #4
0
def reinitialize():
    """
    Reinitialize
    """
    with open(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories.labels'
    ) as f:  # job labels
        labels = f.readlines()
    labels = [x.strip() for x in labels]

    magpie = Magpie(
        keras_model=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5',
        word2vec_model=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here',
        scaler=
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here',
        labels=labels)
    return magpie
예제 #5
0
    def test_rnn_batch_train(self):
        # Get them labels!
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = {line.rstrip('\n') for line in f}

        # Run the model
        model = Magpie()
        model.init_word_vectors(DATA_DIR, vec_dim=100)
        history = model.batch_train(DATA_DIR, labels, nn_model='rnn', epochs=3)
        assert history is not None

        # Do a simple prediction
        predictions = model.predict_from_text("Black holes are cool!")
        assert len(predictions) == len(labels)

        # Assert the hell out of it!
        for lab, val in predictions:
            assert lab in labels
            assert 0 <= val <= 1
예제 #6
0
파일: model.py 프로젝트: nhannt315/MfCareGR
from __future__ import print_function
from magpie import Magpie
import os

dir_path = os.path.dirname(os.path.realpath(os.getcwd()))
dir_path = os.path.join(dir_path, 'text-classification')
labels = open('thread_labels.labels', 'r').read().splitlines()
magpie = Magpie(keras_model='saved_data/model_main',
                word2vec_model='saved_data/word2_vec_model',
                scaler='saved_data/scaler',
                labels=labels)

result = magpie.predict_from_file('test.txt')
예제 #7
0
    def test_cnn_train(self):
        # Get them labels!
        print(PROJECT_DIR)
        print(DATA_DIR)
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = [line.rstrip('\n') for line in f]
            labels = list(set(labels))

        # Run the model

        model = Magpie()
        a = model.train_word2vec(DATA_DIR, vec_dim=300)
        print("done2")

        print("done3")
        model.init_word_vectors(DATA_DIR, vec_dim=300)
        model.train(DATA_DIR,
                    labels,
                    nn_model='cnn',
                    test_ratio=0.2,
                    epochs=30)
        path1 = PROJECT_DIR + '/here1.h5'
        path2 = PROJECT_DIR + '/embedinghere'
        path3 = PROJECT_DIR + '/scaler'
        model.save_word2vec_model(path2)
        model.save_scaler(path3, overwrite=True)
        model.save_model(path1)
        print("thuc hien test")

        # Do a simple prediction

        print(
            model.predict_from_text(
                'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại'
            ))
예제 #8
0
from magpie import Magpie
import time

count = 10
magpie = Magpie()
while (count <= 500):
    start = time.clock()
    magpie.train_word2vec('data/hep-categories', vec_dim=count)
    magpie.save_word2vec_model('save/embeddings/here' + str(count),
                               overwrite=True)
    end = time.clock()
    runtime = end - start
    print(str(count) + ',' + str(runtime))
    file = open('save/embeddings/here.txt', 'a')
    file.write('\n' + str(count) + ',' + str(runtime))
    file.close()
    count = count + 10
def Deep_learning(df, x_test, target):
    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    lab_list = []
    for i, row in df.iterrows():
        if i > len(df):
            break
        else:
            file_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.txt'
            lab_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.lab'

            title_data = df.at[i, target].encode('ascii',
                                                 'ignore').decode('ascii')

            with open(file_name, 'w') as the_file:
                the_file.write(title_data)

            row_data = eval(df.at[i, 'group_id'])
            for j in row_data:
                lab_list.append(j)
                with open(lab_name, 'a') as the_file:
                    the_file.write(str(j) + '\n')
    lab_set = list(set(lab_list))
    file = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' + 'categories' + '.labels'
    for i in lab_set:
        with open(file, 'a') as the_file:
            the_file.write(str(i) + '\n')

    magpie = Magpie()
    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories')

    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        vec_dim=100)

    with open('test_data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        labels,
        test_ratio=0.0,
        epochs=20)

    results_dl = {}

    df_test = pd.DataFrame(np.atleast_2d(x_test), columns=['title'])

    for i, row in df_test.iterrows():
        title_data = df_test.at[i, target].encode('ascii',
                                                  'ignore').decode('ascii')
        title_data = preprocess(title_data)
        # print("This is title: ", title_data)
        df_test.at[i, target] = title_data

        pre_label = [
            s[0] for s in magpie.predict_from_text(title_data) if s[1] >= 0.25
        ]
        # print("This is test: ", title_data)
        # print("This is predict label: ", pre_label)
        results_dl[title_data] = pre_label
    return results_dl
예제 #10
0
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]

#train_dir = 'data/hep-categories'    #2200条数据存放目录
train_dir = 'C:\\data\\Railway_Passenger_Transport'  #2200条数据,以及规章文电存放目录

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [100, 200, 300, 400, 500]:
    try:
        for MIN_WORD_COUNT in [4, 5, 6, 7, 8]:
            for WORD2VEC_CONTEXT in [4, 5, 6, 7, 8]:
                if os.path.exists('log/' + train_dir[-3:] + '_' +
                                  str(EMBEDDING_SIZE) + '_' +
                                  str(MIN_WORD_COUNT) + '_' +
                                  str(WORD2VEC_CONTEXT) + '.txt'):
                    continue
                magpie.train_word2vec(train_dir,
                                      vec_dim=EMBEDDING_SIZE,
                                      MWC=MIN_WORD_COUNT,
                                      w2vc=WORD2VEC_CONTEXT)
                magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
예제 #11
0
# magpie = Magpie(
#     keras_model='save/keras_model_zh.h5',
#     word2vec_model='save/word2vec_model_zh',
#     scaler='save/scaler_zh',
#     labels=labels
# )
# # print(magpie.predict_from_file('data/hep-categories/1002413.txt'))
# print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场\
# 传出了嘲笑声,特朗普立即回应道:“这是真的。”此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超音速导弹作战。'))
# print(magpie.predict_from_text('此外,美军方也有专门的低轨甚至超低轨小型卫星星座计划,这些卫星不仅可用于通信和侦察,还可用于支援反高超\
# 音速导弹作战。特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'))

labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑']
magpie = Magpie(
    keras_model='save/emotion_keras_model.h5',
    word2vec_model='save/emotion_word2vec_model',
    scaler='save/emotion_scaler',
    labels=labels
)
# print(magpie.predict_from_text('害怕,恐怖如斯'))
# print(magpie.predict_from_text('气死我了'))
# print(magpie.predict_from_text('加油,很快就会好的'))
# print(magpie.predict_from_text('希望早日康复'))
# print(magpie.predict_from_text('英国航母战斗群已于1月达到初始作战能力,这标志着英国海军投射力量能力的一个阶段性变化。'))
# print(magpie.predict_from_text('近年来伊朗、叙利亚、缅甸正逐渐成为朝鲜核技术和导弹技术出口的主要客户,其中伊朗所占的比重较高。'))

emotion_dict = {
    '满意': 0,
    '喜悦': 0,
    '乐观': 0,
    '愤怒': 0,
    '悲哀': 0,
            the_file.write(title_data)

        row_data = eval(df.at[i, 'group_id'])
        for j in row_data:
            lab_list.append(j)
            with open(lab_name, 'a') as the_file:
                the_file.write(str(j) + '\n')
lab_set = list(set(lab_list))
file = '/Users/sunxuan/Documents/Impactpool/seniority analysis/googlecloud_magpie/data/' + 'categories' + '.labels'
for i in lab_set:
    with open(file, 'a') as the_file:
        the_file.write(str(i) + '\n')
"""
train process
"""
magpie = Magpie()
# magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
# magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')

magpie.init_word_vectors(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    vec_dim=100)

with open('data/categories.labels') as f:
    labels = f.readlines()
labels = [x.strip() for x in labels]
magpie.train(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    labels,
    test_ratio=0.0,
    epochs=30)
예제 #13
0
import io
import os
import unittest

from magpie import Magpie
PROJECT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
DATA_DIR = os.path.join(PROJECT_DIR, 'data', 'hep-categories')

with io.open(DATA_DIR + '.labels', 'r') as f:
    labels = [line.rstrip('\n') for line in f]
    labels = list(set(labels))
print(len(labels))
print(labels)
path1 = PROJECT_DIR + '/here1.h5'
path2 = PROJECT_DIR + '/embedinghere'
path3 = PROJECT_DIR + '/scaler'

magpie = Magpie(keras_model=path1,
                word2vec_model=path2,
                scaler=path3,
                labels=labels)

predictions = magpie.predict_from_text(
    'toi bi dau bung kham benh het bao nhieu tien')
print(predictions[0], predictions[1], predictions[2])
labels4 = sys.argv[9]
labels = [  labels1, labels2, labels3, labels4 ]

#print (labels)
dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\models\\' + model_name

os.mkdir(dirName)

model_path = dirName + '\\' + model_name
scaler_path = dirName + '\\scaler_' + model_name
keras_path =  dirName + '\\keras_'+  model_name + '.h5'
#print (model_path)
#print (keras_path)

from magpie import Magpie

magpie = Magpie()

magpie.init_word_vectors(data, vec_dim=vec_num)


magpie.train(data, labels, test_ratio= test_rat, epochs = ep)
#more epoch = more understanding of vector and lower lose rate

#magpie.predict_from_text('ECB to reveal bad loan hurdles for euro zone bank test') #test

magpie.save_word2vec_model(model_path)
magpie.save_scaler(scaler_path, overwrite=True)
magpie.save_model(keras_path)

예제 #15
0
from magpie import Magpie

with open('categories.labels') as f:
    labels = [line.rstrip() for line in f.readlines()]

magpie = Magpie(keras_model='current_model/model.h5',
                word2vec_model='current_model/embedding.pkl',
                scaler='current_model/scaler.pkl',
                labels=labels)

predicted = magpie.predict_from_text(
    '“Ich denke, Du wirst die Scheibe irgendwo innerhalb dieses Kreises treffen”.'
)
print(predicted[:5])
예제 #16
0
파일: test1.py 프로젝트: zhang45258/magpie
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]

#train_dir = 'C:\\magpie-master\\data\\hep-categories'
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'C:\\data\\nlp_chinese_corpus'

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [250, 500]:
    for MIN_WORD_COUNT in [5, 10]:
        for WORD2VEC_CONTEXT in [5, 10]:
            magpie.train_word2vec(train_dir,
                                  vec_dim=EMBEDDING_SIZE,
                                  MWC=MIN_WORD_COUNT,
                                  w2vc=WORD2VEC_CONTEXT)
            magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
            magpie.train('C:\\magpie-master\\data\\hep-categories',
                         labels,
                         callbacks=[lossHistory],
                         test_ratio=0.1,
                         epochs=20)  # 训练,20%数据作为测试数据,20轮
            lossHistory.loss_plot(
예제 #17
0
from magpie import Magpie


magpie = Magpie(
keras_model =  'save/model/best.h5',
word2vec_model =  'save/embeddings/best',
scaler = 'save/scaler/best',
labels = ['1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121', '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141', '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212', '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222', '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243', '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331', '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351', '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114', '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132', '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147', '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156', '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171', '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710', '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188', '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224', '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323', '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53', '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115', '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123', '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133', '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312', '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313', '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326', '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'])
# 单条模拟测试数据

text1 = '我想买车票'
mag1 = magpie.predict_from_text(text1)
print(type(mag1))
print(mag1)




'''
#也可以通过从txt文件中读取测试数据进行批量测试
 mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt')
 print(mag2)
'''

예제 #18
0
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@Author: njuselhx
@Time: 2021/1/21 下午7:01
@File: train.py
@Software: PyCharm
"""
from magpie import Magpie
magpie = Magpie()
'''
magpie.init_word_vectors('data/hep-categories-zh', vec_dim=100)
labels = ['军事', '旅游', '政治']
magpie.train('data/hep-categories-zh', labels, test_ratio=0.2, epochs=100)
magpie.save_model('save/keras_model_zh.h5')
magpie.save_word2vec_model('save/word2vec_model_zh', overwrite=True)
magpie.save_scaler('save/scaler_zh', overwrite=True)
print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'))
'''

magpie.init_word_vectors('data/emotion-categories', vec_dim=100)
labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑']
magpie.train('data/emotion-categories', labels, test_ratio=0.2, epochs=2333)
magpie.save_model('save/emotion_keras_model.h5')
magpie.save_word2vec_model('save/emotion_word2vec_model', overwrite=True)
magpie.save_scaler('save/emotion_scaler', overwrite=True)
예제 #19
0
파일: run.py 프로젝트: wenwei-dev/magpie
from magpie import Magpie

magpie = Magpie()
magpie.init_word_vectors('data/hep-categories', vec_dim=100)
labels = [
    "Astrophysics",
    "Experiment-HEP",
    "Gravitation and Cosmology",
    "Phenomenology-HEP",
    "Theory-HEP",
]
magpie.train('data/hep-categories', labels, test_ratio=0.2, epochs=30)
print(magpie.predict_from_text('Stephen Hawking studies black holes'))
예제 #20
0
import os
import sys
sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie(
    keras_model='../workspace/model.h5',
    word2vec_model='../workspace/embeddings',
    scaler='../workspace/scaler',
    labels=['旅游', '军事', '政治']
)

# 单条模拟测试数据
text = '特朗普在联合国大会发表演讲谈到这届美国政府成绩时,称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声,特朗普立即回应道:“这是真的。”'
mag1 = magpie.predict_from_text(text)
print(mag1)

'''
#也可以通过从txt文件中读取测试数据进行批量测试
mag2 = magpie.predict_from_file('data/hep-categories/1002413.txt')
print(mag2)
'''
예제 #21
0
파일: train-1.py 프로젝트: SINeWang/magpie
import os
import sys

sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie()
magpie.train_word2vec('../data/hep-categories', vec_dim=3)  #训练一个word2vec
magpie.fit_scaler('../data/hep-categories')  #生成scaler
magpie.init_word_vectors('../data/hep-categories', vec_dim=3)  #初始化词向量
labels = ['军事', '旅游', '政治']  #定义所有类别
magpie.train('../data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练,20%数据作为测试数据,5轮

#保存训练后的模型文件
magpie.save_word2vec_model('../workspace/embeddings', overwrite=True)
magpie.save_scaler('../workspace/scaler', overwrite=True)
magpie.save_model('../workspace/model.h5')
예제 #22
0
from magpie import Magpie

#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'data/hep-categories'
magpie = Magpie()
magpie.train_word2vec(train_dir, vec_dim=100, MWC=1, w2vc=5)
magpie.fit_scaler('data/hep-categories')
magpie.init_word_vectors('data/hep-categories')

#定义所有类别
labels = [
    '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121',
    '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141',
    '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212',
    '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222',
    '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243',
    '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331',
    '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351',
    '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114',
    '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132',
    '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147',
    '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156',
    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
예제 #23
0
def train_magpie(labels):
    magpie = Magpie()
    magpie.init_word_vectors(WRITE_SK_CAT_PATH, vec_dim=VEC_DIM)
    magpie.train(WRITE_SK_CAT_PATH, labels, test_ratio=0.2, epochs=EPOCHS)
    return magpie
class StockPrediction:
    def __init__(self):
        File=open("stockLabels2.labels","r")
        List=[""]
        for Line in File:
            List.append(string.replace(Line,'\n',''))
        self.labels=List
        result =False
        
        path=os.path.join('','savedMagpieModels')
        latest_path='savedMagpieModels/'+find_latest(path)
        self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
                  word2vec_model=str(latest_path+'/embedding'),
                  scaler=str(latest_path+'/scaler'),
                  labels=self.labels)
      def delete_model(self):
        del self.model
        
    def load_model(self):
        print('loading model ...')
        result =False
        path=os.path.join('','savedMagpieModels')
        try:#error handeling must be added 
            latest_path='savedMagpieModels/'+find_latest(path)
            self.model=Magpie(keras_model=str(latest_path+'/model.h5'), 
                  word2vec_model=str(latest_path+'/embedding'),
                  scaler=str(latest_path+'/scaler'),
                  labels=self.labels)
            print('2222')
            result=True
            print('model loaded')
        except:
            print('ERR in stockPrediction.loadModel()')
        return result
    

        
    def create_stocks_bool_json(self, magpie_result):        
        REstock=re.compile(r'[A-Z]+')
        REprobability=re.compile(r'[0][.][0-9]+')
        stock_names=[]
        stock_probability=[]
        for stock in magpie_result:
            magpie_result_str=str(stock)
            listToks=magpie_result_str.split(',')
            stock_names.append(listToks[0][2:-1])
            stock_probability.append(float(listToks[1][1:-1]))
                     
        #boolList=[0]*len(self.labels)
        json_dict = {}
        data = []
        for i in stock_names:
            temp_dic={}
            labelIndex=str(self.labels.index(i))
            if i== 'JCY':
                r=0
            if stock_probability[stock_names.index(i)] >self.THRESHOLD:
                temp_dic["name"]=i
                temp_dic["index"]=labelIndex
                temp_dic["prediction"]=1                         
            else:
                temp_dic["name"]=i
                temp_dic["index"]=labelIndex
                temp_dic["prediction"]=0                
            data.append(temp_dic)
        json_dict["news_number"]=100
        json_dict["prediction"]=data        
        return json_dict
            
            
    def run(self,news, threshold):
        self.THRESHOLD=threshold
        output=self.model.predict_from_text(news)
        return self.create_stocks_bool_json(output)
예제 #25
0
def load_magpie(labels):
    magpie = Magpie(keras_model=SAVE_MAGPIE_MODEL_PATH,
                    word2vec_model=SAVE_MAGPIE_WORD2VEC_PATH,
                    scaler=SAVE_MAGPIE_SCALER_PATH,
                    labels=labels)
    return magpie
예제 #26
0
        with open(address + '/' + Id + '.txt', "a") as file:
            file.write(Text)

        with open(address + '/' + Id + '.lab', "a") as file:
            file.write(label)

        print("Data generation finished.")


address = "/home/ubuntu/toxic/magpie_data"

#data_prep("/Users/wangergou/Downloads/kaggle/Toxic_Comment_Classification/Magpie/data/")

data_prep(address)

magpie = Magpie()

print("Loading word vector... \n")

magpie.train_word2vec(address, vec_dim=100)

print("Initializing data... \n")

magpie.init_word_vectors(address, vec_dim=100)

labels = [
    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
]

print("Training starts... \n")
예제 #27
0
파일: p.py 프로젝트: zhang45258/magpie
from magpie import Magpie

magpie = Magpie()
#train_dir = 'data/hep-categories'
train_dir = 'C:\\data\\Railway_Passenger_Transport'
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
EMBEDDING_SIZE = 50
MIN_WORD_COUNT = 1
WORD2VEC_CONTEXT = 1
magpie.train_word2vec(train_dir,
                      vec_dim=EMBEDDING_SIZE,
                      MWC=MIN_WORD_COUNT,
                      w2vc=WORD2VEC_CONTEXT)
magpie.save_word2vec_model('save/embeddings/' + train_dir[-3:] + '_' +
                           str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) +
                           '_' + str(WORD2VEC_CONTEXT),
                           overwrite=True)
print(train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) +
      '_' + str(WORD2VEC_CONTEXT) + '   Success!!!')
from magpie import Magpie
import os

folder = "magpie_data"
labf = open(folder + "askubuntu.labels", 'r')
labels = labf.read()
labels = labels.split('\n')
labels = [l for l in labels if len(l) > 1]

print("loading model")
magpie = Magpie(keras_model=folder + '/model.h5',
                word2vec_model=folder + '/wordvec',
                scaler=folder + '/scalervec',
                labels=labels)
#print(labels)
예제 #29
0
def train_dl(save, vec_dim, epochs):
    """
    train process
    """
    magpie = Magpie()

    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')
    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        vec_dim=vec_dim)

    with open('data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        labels,
        test_ratio=0.0,
        epochs=epochs)

    if save:
        """
        Save model
        """
        magpie.save_word2vec_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here'
        )
        magpie.save_scaler(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here',
            overwrite=True)
        magpie.save_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5'
        )
    return magpie
#print (labels)
dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\models\\' + model_name

#os.mkdir(dirName)

model_path = dirName + '\\' + model_name
scaler_path = dirName + '\\scaler_' + model_name
keras_path =  dirName + '\\keras_'+  model_name + '.h5'

#print (model_path)
#print (keras_path)
#print (scaler_path)

from magpie import Magpie

magpie = Magpie()

magpie = Magpie(
   keras_model=keras_path,
  word2vec_model=model_path,
 scaler= scaler_path,
labels = labelsa
)
#filePath = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify' + model_name + '\\' + '.txt'
path= 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\classify\\' + model_name + '\\' + val
#print(path)
print (magpie.predict_from_file(path)) #test

#magpie.predict_from_text('Manchester United vs Chelsea')

예제 #31
0
        alllabel.append(line)
    return alllabel


#第四步 训练模型预测测试集的标签
if __name__ == '__main__':

    labels = getlabel('/home/ydm/ren/remote/multiLabel/data/labels.txt')
    # magpie = Magpie(
    #     keras_model='/home/ydm/ren/remote/multiLabel/data/here.h5',
    #     word2vec_model='/home/ydm/ren/remote/multiLabel/data/word2vec_mode',
    #     scaler='/home/ydm/ren/remote/multiLabel/data/scaler',
    #     labels=labels
    # )

    magpie = Magpie()
    magpie.init_word_vectors(
        '/home/ydm/ren/remote/multiLabel/data/hep-categories', vec_dim=100)

    print(len(labels))
    magpie.train('/home/ydm/ren/remote/multiLabel/data/hep-categories',
                 labels,
                 epochs=30,
                 batch_size=128)
    magpie.save_word2vec_model(
        '/home/ydm/ren/remote/multiLabel/data/word2vec_mode_place')
    magpie.save_scaler('/home/ydm/ren/remote/multiLabel/data/scaler_place',
                       overwrite=True)
    magpie.save_model('/home/ydm/ren/remote/multiLabel/data/model_place.h5')

    alltest = getlabel(