Esempi in Python per Magpie.train, esempi in Python per magpie.Magpie.train

Esempio n. 1

0

Mostra file

File: test_api.py Progetto: quangtm199/Project

    def test_cnn_train(self):
        # Get them labels!
        print(PROJECT_DIR)
        print(DATA_DIR)
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = [line.rstrip('\n') for line in f]
            labels = list(set(labels))

        # Run the model

        model = Magpie()
        a = model.train_word2vec(DATA_DIR, vec_dim=300)
        print("done2")

        print("done3")
        model.init_word_vectors(DATA_DIR, vec_dim=300)
        model.train(DATA_DIR,
                    labels,
                    nn_model='cnn',
                    test_ratio=0.2,
                    epochs=30)
        path1 = PROJECT_DIR + '/here1.h5'
        path2 = PROJECT_DIR + '/embedinghere'
        path3 = PROJECT_DIR + '/scaler'
        model.save_word2vec_model(path2)
        model.save_scaler(path3, overwrite=True)
        model.save_model(path1)
        print("thuc hien test")

        # Do a simple prediction

        print(
            model.predict_from_text(
                'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại'
            ))

Esempio n. 2

0

Mostra file

def train_dl(save, vec_dim, epochs):
    """
    train process
    """
    magpie = Magpie()

    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')
    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        vec_dim=vec_dim)

    with open('data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
        labels,
        test_ratio=0.0,
        epochs=epochs)

    if save:
        """
        Save model
        """
        magpie.save_word2vec_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here'
        )
        magpie.save_scaler(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here',
            overwrite=True)
        magpie.save_model(
            '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5'
        )
    return magpie

Esempio n. 3

0

Mostra file

File: test_api.py Progetto: netrasys/magpie

	def test_cnn_train(self):
		# Get them labels!
		with io.open(DATA_DIR + '.labels', 'r') as f:
			labels = {line.rstrip('\n') for line in f}

		# Run the model
		model = Magpie()
		model.init_word_vectors(DATA_DIR, vec_dim=100)
		history = model.train(DATA_DIR, labels, nn_model='cnn', test_ratio=0.3, epochs=3)
		assert history is not None

		# Do a simple prediction
		predictions = model.predict_from_text("Black holes are cool!")
		assert len(predictions) == len(labels)

		# Assert the hell out of it!
		for lab, val in predictions:
			assert lab in labels
			assert 0 <= val <= 1

Esempio n. 4

0

Mostra file

File: test_api.py Progetto: otmidhuna/Prognosis

    def test_cnn_train(self):
        # Get them labels!
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = {line.rstrip('\n') for line in f}

        # Run the model
        model = Magpie()
        model.init_word_vectors(DATA_DIR, vec_dim=100)
        history = model.train(DATA_DIR,
                              labels,
                              nn_model='cnn',
                              test_ratio=0.3,
                              epochs=3)
        assert history is not None

        # Do a simple prediction
        predictions = model.predict_from_text("Black holes are cool!")
        assert len(predictions) == len(labels)

        # Assert the hell out of it!
        for lab, val in predictions:
            assert lab in labels
            assert 0 <= val <= 1

Esempio n. 5

0

Mostra file

    '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212',
    '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222',
    '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243',
    '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331',
    '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351',
    '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114',
    '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132',
    '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147',
    '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156',
    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]
magpie.train('data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练，20%数据作为测试数据，20轮  根据给定数据训练模型
'''
#保存训练后的模型文件
magpie.save_word2vec_model('save/embeddings/best', overwrite=True)
magpie.save_scaler('save/scaler/best', overwrite=True)
magpie.save_model('save/model/best.h5')
'''

Esempio n. 6

0

Mostra file

File: Parameter_w2v.py Progetto: zhang45258/magpie

 for WORD2VEC_CONTEXT in [4, 5, 6, 7, 8]:
     if os.path.exists('log/' + train_dir[-3:] + '_' +
                       str(EMBEDDING_SIZE) + '_' +
                       str(MIN_WORD_COUNT) + '_' +
                       str(WORD2VEC_CONTEXT) + '.txt'):
         continue
     magpie.train_word2vec(train_dir,
                           vec_dim=EMBEDDING_SIZE,
                           MWC=MIN_WORD_COUNT,
                           w2vc=WORD2VEC_CONTEXT)
     magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
     magpie.train(
         'C:\\magpie-master\\data\\hep-categories',
         labels,
         callbacks=[lossHistory],
         test_ratio=0.1,
         epochs=20,
         logdir='C:\\magpie-master\\log\\' + train_dir[-3:] + '_' +
         str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
         str(WORD2VEC_CONTEXT) + '.txt')  # 训练，10%数据作为测试数据，20轮
     lossHistory.loss_plot(
         'epoch', 'C:\\magpie-master\\pic\\' + train_dir[-3:] +
         '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) +
         '_' + str(WORD2VEC_CONTEXT) + '.jpg')
     '''
     magpie.save_word2vec_model(
         'C:\\magpie-master\\save\\embeddings\\' + train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(
             MIN_WORD_COUNT) + '_' + str(WORD2VEC_CONTEXT))
     magpie.save_scaler(
         'C:\\magpie-master\\save\\scaler\\' + train_dir[-3:] + '_' + str(EMBEDDING_SIZE) + '_' + str(
             MIN_WORD_COUNT) + '_' + str(WORD2VEC_CONTEXT))

Esempio n. 7

0

Mostra file

            file.write(label)

        print("Data generation finished.")


address = "/home/ubuntu/toxic/magpie_data"

#data_prep("/Users/wangergou/Downloads/kaggle/Toxic_Comment_Classification/Magpie/data/")

data_prep(address)

magpie = Magpie()

print("Loading word vector... \n")

magpie.train_word2vec(address, vec_dim=100)

print("Initializing data... \n")

magpie.init_word_vectors(address, vec_dim=100)

labels = [
    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
]

print("Training starts... \n")

magpie.train(address, labels, test_ratio=0.2, epochs=30)

magpie.save_model('/home/ubuntu/toxic/magpie_model.h5')

Esempio n. 8

0

Mostra file

def train_magpie(labels):
    magpie = Magpie()
    magpie.init_word_vectors(WRITE_SK_CAT_PATH, vec_dim=VEC_DIM)
    magpie.train(WRITE_SK_CAT_PATH, labels, test_ratio=0.2, epochs=EPOCHS)
    return magpie

Esempio n. 9

0

Mostra file

File: magpieTrainVect.py Progetto: poemyaya/NEARM

    labels = getlabel('/home/ydm/ren/remote/multiLabel/data/labels.txt')
    # magpie = Magpie(
    #     keras_model='/home/ydm/ren/remote/multiLabel/data/here.h5',
    #     word2vec_model='/home/ydm/ren/remote/multiLabel/data/word2vec_mode',
    #     scaler='/home/ydm/ren/remote/multiLabel/data/scaler',
    #     labels=labels
    # )

    magpie = Magpie()
    magpie.init_word_vectors(
        '/home/ydm/ren/remote/multiLabel/data/hep-categories', vec_dim=100)

    print(len(labels))
    magpie.train('/home/ydm/ren/remote/multiLabel/data/hep-categories',
                 labels,
                 epochs=30,
                 batch_size=128)
    magpie.save_word2vec_model(
        '/home/ydm/ren/remote/multiLabel/data/word2vec_mode_place')
    magpie.save_scaler('/home/ydm/ren/remote/multiLabel/data/scaler_place',
                       overwrite=True)
    magpie.save_model('/home/ydm/ren/remote/multiLabel/data/model_place.h5')

    alltest = getlabel(
        '/home/ydm/ren/remote/multiLabel/data/allsents_test.txt')
    # alltest = [alltest]
    writes = open('/home/ydm/ren/remote/multiLabel/data/result_place.txt',
                  'w',
                  encoding='utf-8')

    for sent in alltest:

Esempio n. 10

0

Mostra file

File: train.py Progetto: FertileFragrance/data_not_scientific

#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@Author: njuselhx
@Time: 2021/1/21 下午7:01
@File: train.py
@Software: PyCharm
"""
from magpie import Magpie
magpie = Magpie()
'''
magpie.init_word_vectors('data/hep-categories-zh', vec_dim=100)
labels = ['军事', '旅游', '政治']
magpie.train('data/hep-categories-zh', labels, test_ratio=0.2, epochs=100)
magpie.save_model('save/keras_model_zh.h5')
magpie.save_word2vec_model('save/word2vec_model_zh', overwrite=True)
magpie.save_scaler('save/scaler_zh', overwrite=True)
print(magpie.predict_from_text('特朗普在联合国大会发表演讲谈到这届美国政府成绩时，称他已经取得了美国历史上几乎最大的成就。随后大会现场传出了嘲笑声，特朗普立即回应道：“这是真的。”'))
'''

magpie.init_word_vectors('data/emotion-categories', vec_dim=100)
labels = ['满意', '喜悦', '乐观', '愤怒', '悲哀', '恐惧', '厌恶', '焦虑', '怀疑']
magpie.train('data/emotion-categories', labels, test_ratio=0.2, epochs=2333)
magpie.save_model('save/emotion_keras_model.h5')
magpie.save_word2vec_model('save/emotion_word2vec_model', overwrite=True)
magpie.save_scaler('save/emotion_scaler', overwrite=True)

Esempio n. 11

0

Mostra file

File: run.py Progetto: wenwei-dev/magpie

from magpie import Magpie

magpie = Magpie()
magpie.init_word_vectors('data/hep-categories', vec_dim=100)
labels = [
    "Astrophysics",
    "Experiment-HEP",
    "Gravitation and Cosmology",
    "Phenomenology-HEP",
    "Theory-HEP",
]
magpie.train('data/hep-categories', labels, test_ratio=0.2, epochs=30)
print(magpie.predict_from_text('Stephen Hawking studies black holes'))

Esempio n. 12

0

Mostra file

File: train-1.py Progetto: SINeWang/magpie

import os
import sys

sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie()
magpie.train_word2vec('../data/hep-categories', vec_dim=3)  #训练一个word2vec
magpie.fit_scaler('../data/hep-categories')  #生成scaler
magpie.init_word_vectors('../data/hep-categories', vec_dim=3)  #初始化词向量
labels = ['军事', '旅游', '政治']  #定义所有类别
magpie.train('../data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练，20%数据作为测试数据，5轮

#保存训练后的模型文件
magpie.save_word2vec_model('../workspace/embeddings', overwrite=True)
magpie.save_scaler('../workspace/scaler', overwrite=True)
magpie.save_model('../workspace/model.h5')

Esempio n. 13

0

Mostra file

File: magpie_occup.py Progetto: stuartnankai/Multi_lable_classificaion

def Deep_learning(df, x_test, target):
    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    folder = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/'
    for the_file in os.listdir(folder):
        file_path = os.path.join(folder, the_file)
        try:
            if os.path.isfile(file_path):
                os.unlink(file_path)
            # elif os.path.isdir(file_path): shutil.rmtree(file_path)
        except Exception as e:
            print(e)

    lab_list = []
    for i, row in df.iterrows():
        if i > len(df):
            break
        else:
            file_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.txt'
            lab_name = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories/' + str(
                i) + '.lab'

            title_data = df.at[i, target].encode('ascii',
                                                 'ignore').decode('ascii')

            with open(file_name, 'w') as the_file:
                the_file.write(title_data)

            row_data = eval(df.at[i, 'group_id'])
            for j in row_data:
                lab_list.append(j)
                with open(lab_name, 'a') as the_file:
                    the_file.write(str(j) + '\n')
    lab_set = list(set(lab_list))
    file = '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/' + 'categories' + '.labels'
    for i in lab_set:
        with open(file, 'a') as the_file:
            the_file.write(str(i) + '\n')

    magpie = Magpie()
    # magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories', vec_dim=100)
    # magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories')

    magpie.init_word_vectors(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        vec_dim=100)

    with open('test_data/categories.labels') as f:
        labels = f.readlines()
    labels = [x.strip() for x in labels]
    magpie.train(
        '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/test_data/categories',
        labels,
        test_ratio=0.0,
        epochs=20)

    results_dl = {}

    df_test = pd.DataFrame(np.atleast_2d(x_test), columns=['title'])

    for i, row in df_test.iterrows():
        title_data = df_test.at[i, target].encode('ascii',
                                                  'ignore').decode('ascii')
        title_data = preprocess(title_data)
        # print("This is title: ", title_data)
        df_test.at[i, target] = title_data

        pre_label = [
            s[0] for s in magpie.predict_from_text(title_data) if s[1] >= 0.25
        ]
        # print("This is test: ", title_data)
        # print("This is predict label: ", pre_label)
        results_dl[title_data] = pre_label
    return results_dl

Esempio n. 14

0

Mostra file

File: magpie_occup.py Progetto: stuartnankai/Multi_lable_classificaion

train process
"""
magpie = Magpie()
# magpie.train_word2vec('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories', vec_dim=100)
# magpie.fit_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories')

magpie.init_word_vectors(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    vec_dim=100)

with open('data/categories.labels') as f:
    labels = f.readlines()
labels = [x.strip() for x in labels]
magpie.train(
    '/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories',
    labels,
    test_ratio=0.0,
    epochs=30)

# """
# Save model
# """
#
# magpie.save_word2vec_model('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/embeddings/here')
# magpie.save_scaler('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/scaler/here', overwrite=True)
# magpie.save_model('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/save/model/here.h5')
"""
Reinitialize
"""

# with open('/Users/sunxuan/Documents/PycharmProjects/ImpactPool/data/categories.labels') as f:

Esempio n. 15

0

Mostra file

File: test1.py Progetto: zhang45258/magpie

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [250, 500]:
    for MIN_WORD_COUNT in [5, 10]:
        for WORD2VEC_CONTEXT in [5, 10]:
            magpie.train_word2vec(train_dir,
                                  vec_dim=EMBEDDING_SIZE,
                                  MWC=MIN_WORD_COUNT,
                                  w2vc=WORD2VEC_CONTEXT)
            magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
            magpie.train('C:\\magpie-master\\data\\hep-categories',
                         labels,
                         callbacks=[lossHistory],
                         test_ratio=0.1,
                         epochs=20)  # 训练，20%数据作为测试数据，20轮
            lossHistory.loss_plot(
                'epoch', 'C:\\magpie-master\\' + train_dir[-3:] + '_' +
                str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT) + '.jpg')
            magpie.save_word2vec_model(
                'C:\\magpie-master\\save\\embeddings\\' + train_dir[-3:] +
                '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT))
            magpie.save_scaler('C:\\magpie-master\\save\\scaler\\' +
                               train_dir[-3:] + '_' + str(EMBEDDING_SIZE) +
                               '_' + str(MIN_WORD_COUNT) + '_' +
                               str(WORD2VEC_CONTEXT))
            magpie.save_model('C:\\magpie-master\\save\\model\\' +

Esempio n. 16

0

Mostra file

File: train.py Progetto: cka-14/ceng-407-408-2018-2019-Multi-class-Classification-of-News-Text-1-

labels4 = sys.argv[9]
labels = [  labels1, labels2, labels3, labels4 ]

#print (labels)
dirName = 'D:\\xampp\\htdocs\\mtlbl\\webpage\\admin\\models\\' + model_name

os.mkdir(dirName)

model_path = dirName + '\\' + model_name
scaler_path = dirName + '\\scaler_' + model_name
keras_path =  dirName + '\\keras_'+  model_name + '.h5'
#print (model_path)
#print (keras_path)

from magpie import Magpie

magpie = Magpie()

magpie.init_word_vectors(data, vec_dim=vec_num)


magpie.train(data, labels, test_ratio= test_rat, epochs = ep)
#more epoch = more understanding of vector and lower lose rate

#magpie.predict_from_text('ECB to reveal bad loan hurdles for euro zone bank test') #test

magpie.save_word2vec_model(model_path)
magpie.save_scaler(scaler_path, overwrite=True)
magpie.save_model(keras_path)

Esempio n. 17

0

Mostra file

File: Paramter_train.py Progetto: zhang45258/magpie

                                      min_lr=0)
'''
#调参
for optimizer in ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']:
    for BATCH_SIZE in [16, 32, 64, 128, 256]:
        print(optimizer+str(BATCH_SIZE))
        magpie.train('data/hep-categories',
                     labels,
                     batch_size=BATCH_SIZE,
                     callbacks=[checkpoint, reduceLROnPlateau],
                     test_ratio=0.1,
                     epochs=60,
                     verbose=1,
                     optimizer=optimizer,
                     logdir='C:\\magpie-master\\trainlog\\' + optimizer + '_' + str(BATCH_SIZE) + '.txt'
                    )
'''
#形成最终模型
magpie.train(
    'data/hep-categories',
    labels,
    batch_size=16,
    callbacks=[checkpoint, reduceLROnPlateau],
    test_ratio=0.0,
    epochs=60,
    verbose=1,
    optimizer='Adam',
)
magpie.save_word2vec_model('save/embeddings/best', overwrite=True)
magpie.save_scaler('save/scaler/best', overwrite=True)
magpie.save_model('save/model/best.h5')