Esempi in Python per Magpie.train_word2vec

Linguaggio di programmazione: Python

Spazio dei nomi/nome del pacchetto: magpie

Classe/tipologia: Magpie

Metodo/funzione: train_word2vec

Esempi su hotexamples.com: 7

Magpie.train_word2vec in Python: 7 esempi trovati. Questi sono i migliori esempi reali in Python per magpie.Magpie.train_word2vec, estratti da progetti open source. Li puoi valutare, per aiutarci a migliorare la qualità dei nostri esempi.

Metodi utilizzati di frequente

Mostra Nascondi

Magpie(18)

train(16)

init_word_vectors(13)

save_word2vec_model(11)

predict_from_text(10)

save_model(9)

save_scaler(8)

train_word2vec(7)

fit_scaler(3)

predict_from_file(2)

batch_train(1)

Esempio n. 1

Mostra file

File: test_api.py Progetto: quangtm199/Project

    def test_cnn_train(self):
        # Get them labels!
        print(PROJECT_DIR)
        print(DATA_DIR)
        with io.open(DATA_DIR + '.labels', 'r') as f:
            labels = [line.rstrip('\n') for line in f]
            labels = list(set(labels))

        # Run the model

        model = Magpie()
        a = model.train_word2vec(DATA_DIR, vec_dim=300)
        print("done2")

        print("done3")
        model.init_word_vectors(DATA_DIR, vec_dim=300)
        model.train(DATA_DIR,
                    labels,
                    nn_model='cnn',
                    test_ratio=0.2,
                    epochs=30)
        path1 = PROJECT_DIR + '/here1.h5'
        path2 = PROJECT_DIR + '/embedinghere'
        path3 = PROJECT_DIR + '/scaler'
        model.save_word2vec_model(path2)
        model.save_scaler(path3, overwrite=True)
        model.save_model(path1)
        print("thuc hien test")

        # Do a simple prediction

        print(
            model.predict_from_text(
                'cho em hỏi về lịch khám của bác_sỹ đào việt_hằng và số điện_thoại'
            ))

Esempio n. 2

Mostra file

File: test1.py Progetto: zhang45258/magpie

]

#train_dir = 'C:\\magpie-master\\data\\hep-categories'
#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'C:\\data\\nlp_chinese_corpus'

Success = 'Success:'
error = 'error:'

magpie = Magpie()
lossHistory = LossHistory()
for EMBEDDING_SIZE in [250, 500]:
    for MIN_WORD_COUNT in [5, 10]:
        for WORD2VEC_CONTEXT in [5, 10]:
            magpie.train_word2vec(train_dir,
                                  vec_dim=EMBEDDING_SIZE,
                                  MWC=MIN_WORD_COUNT,
                                  w2vc=WORD2VEC_CONTEXT)
            magpie.fit_scaler('C:\\magpie-master\\data\\hep-categories')
            magpie.train('C:\\magpie-master\\data\\hep-categories',
                         labels,
                         callbacks=[lossHistory],
                         test_ratio=0.1,
                         epochs=20)  # 训练，20%数据作为测试数据，20轮
            lossHistory.loss_plot(
                'epoch', 'C:\\magpie-master\\' + train_dir[-3:] + '_' +
                str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT) + '.jpg')
            magpie.save_word2vec_model(
                'C:\\magpie-master\\save\\embeddings\\' + train_dir[-3:] +
                '_' + str(EMBEDDING_SIZE) + '_' + str(MIN_WORD_COUNT) + '_' +
                str(WORD2VEC_CONTEXT))

Esempio n. 3

Mostra file

from magpie import Magpie

#train_dir = 'C:\\data\\Railway_Passenger_Transport'
train_dir = 'data/hep-categories'
magpie = Magpie()
magpie.train_word2vec(train_dir, vec_dim=100, MWC=1, w2vc=5)
magpie.fit_scaler('data/hep-categories')
magpie.init_word_vectors('data/hep-categories')

#定义所有类别
labels = [
    '1111', '1112', '1113', '1114', '1115', '1116', '1117', '1118', '1121',
    '1122', '1123', '1124', '1131', '1132', '1133', '1134', '1135', '1141',
    '1142', '1143', '1144', '1151', '1152', '1153', '1154', '1211', '1212',
    '1213', '1214', '1215', '1216', '1217', '1218', '1219', '1221', '1222',
    '1223', '1231', '1232', '1233', '1234', '1235', '1241', '1242', '1243',
    '1251', '1311', '1312', '1313', '1314', '1321', '1322', '1323', '1331',
    '1332', '1333', '1334', '1341', '1342', '1343', '1344', '1345', '1351',
    '1411', '1421', '1431', '1441', '15', '2111', '2112', '2113', '2114',
    '2115', '2116', '2117', '2121', '2122', '2123', '2124', '2131', '2132',
    '2133', '2134', '2141', '2142', '2143', '2144', '2145', '2146', '2147',
    '2148', '2149', '21410', '2151', '2152', '2153', '2154', '2155', '2156',
    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',

Esempio n. 4

Mostra file

from magpie import Magpie
import time

count = 10
magpie = Magpie()
while (count <= 500):
    start = time.clock()
    magpie.train_word2vec('data/hep-categories', vec_dim=count)
    magpie.save_word2vec_model('save/embeddings/here' + str(count),
                               overwrite=True)
    end = time.clock()
    runtime = end - start
    print(str(count) + ',' + str(runtime))
    file = open('save/embeddings/here.txt', 'a')
    file.write('\n' + str(count) + ',' + str(runtime))
    file.close()
    count = count + 10

Esempio n. 5

Mostra file

            file.write(label)

        print("Data generation finished.")


address = "/home/ubuntu/toxic/magpie_data"

#data_prep("/Users/wangergou/Downloads/kaggle/Toxic_Comment_Classification/Magpie/data/")

data_prep(address)

magpie = Magpie()

print("Loading word vector... \n")

magpie.train_word2vec(address, vec_dim=100)

print("Initializing data... \n")

magpie.init_word_vectors(address, vec_dim=100)

labels = [
    'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
]

print("Training starts... \n")

magpie.train(address, labels, test_ratio=0.2, epochs=30)

magpie.save_model('/home/ubuntu/toxic/magpie_model.h5')

Esempio n. 6

Mostra file

File: train-1.py Progetto: SINeWang/magpie

import os
import sys

sys.path.append(os.path.realpath(os.getcwd()))
sys.path.append("..")

from magpie import Magpie

magpie = Magpie()
magpie.train_word2vec('../data/hep-categories', vec_dim=3)  #训练一个word2vec
magpie.fit_scaler('../data/hep-categories')  #生成scaler
magpie.init_word_vectors('../data/hep-categories', vec_dim=3)  #初始化词向量
labels = ['军事', '旅游', '政治']  #定义所有类别
magpie.train('../data/hep-categories', labels, test_ratio=0.2,
             epochs=20)  #训练，20%数据作为测试数据，5轮

#保存训练后的模型文件
magpie.save_word2vec_model('../workspace/embeddings', overwrite=True)
magpie.save_scaler('../workspace/scaler', overwrite=True)
magpie.save_model('../workspace/model.h5')

Esempio n. 7

Mostra file

File: Paramter_train.py Progetto: zhang45258/magpie

    '2161', '2162', '2163', '2164', '2165', '2166', '2167', '2168', '2171',
    '2172', '2173', '2174', '2175', '2176', '2177', '2178', '2179', '21710',
    '21711', '2181', '2182', '2183', '2184', '2185', '2186', '2187', '2188',
    '2191', '2192', '2193', '2194', '2195', '2196', '221', '222', '223', '224',
    '2311', '2312', '2313', '2314', '2315', '2316', '2321', '2322', '2323',
    '2324', '24', '31', '32', '33', '34', '41', '42', '43', '51', '52', '53',
    '54', '55', '56', '57', '58', '61', '7111', '7112', '7113', '7114', '7115',
    '7116', '7117', '7118', '7119', '71110', '71111', '7121', '7122', '7123',
    '7124', '7125', '7126', '7127', '7128', '7129', '7131', '7132', '7133',
    '7134', '7135', '7136', '7137', '7138', '7139', '71310', '71311', '71312',
    '7141', '7142', '7151', '721', '722', '723', '724', '7311', '7312', '7313',
    '7314', '7315', '7316', '7321', '7322', '7323', '7324', '7325', '7326',
    '7331', '7332', '7333', '7334', '7335', '7336', '734', '74'
]
magpie.train_word2vec('C:\\data\\Railway_Passenger_Transport',
                      vec_dim=300,
                      MWC=8,
                      w2vc=6)
magpie.fit_scaler('data/hep-categories')
magpie.init_word_vectors('data/hep-categories')
'''
保存在验证集上最好的模型。
filename：字符串，保存模型的路径
monitor：需要监视的值
verbose：信息展示模式，0或1
save_best_only：当设置为True时，将只保存在验证集上性能最好的模型
mode：‘auto’，‘min’，‘max’之一，在save_best_only=True时决定性能最佳模型的评判准则，
例如，当监测值为val_acc时，模式应为max，
当检测值为val_loss时，模式应为min。在auto模式下，评价准则由被监测值的名字自动推断。
save_weights_only：若设置为True，则只保存模型权重，否则将保存整个模型（包括模型结构，配置信息等）
period：CheckPoint之间的间隔的epoch数
https://keras.io/zh/callbacks/#history