datasets = {
        'unidep_pos':                           # Name of the dataset
        {'columns': {1: 'tokens', 3: 'POS'},    # Column 1 contains tokens, column 3 contains POS information
         'label': 'POS',                        # Which column we like to predict
         'evaluate': True,                      # Set true always for single task setups
         'commentSymbol': None}                 # Lines in the input data starting with this string will be skipped
}


# :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically ::
# :: 词向量文件地址,采样Komninos词向量;没有则自动下载
embeddingsPath = 'komninos_english_embeddings.gz'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
# :: 数据预处理,并保存为cPickle文件
pickleFile = prepareDataset(embeddingsPath, datasets)


############################################################################################################
#
# 2.Network training
#
############################################################################################################
# :: Load the embeddings and the dataset ::
# :: 加载词向量和训练数据 ::
embeddings, mappings, data = loadDatasetPickle(pickleFile)
params = {'classifier': ['CRF'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)}

print("***** Train the model with 1 Epoch and store to disk")
model = BiLSTM(params)
model.setMappings(mappings, embeddings)
Exemple #2
0
loggingLevel = logging.INFO
logger = logging.getLogger()
logger.setLevel(loggingLevel)

ch = logging.FileHandler(logfile)
ch.setLevel(loggingLevel)
formatter = logging.Formatter('%(message)s')
ch.setFormatter(formatter)
logger.addHandler(ch)

# Data preprocessing
datasetFiles = [
    (datasetName, dataColumns),
]
pickleFile = prepareDataset(embeddingsPath, datasetFiles,
                            prosody=params['prosody'],
                            prosody_feats=params['prosody_feats'])

#Load the embeddings and the dataset
embeddings, word2Idx, datasets = loadDatasetPickle(pickleFile)
data = datasets[datasetName]

print('Training, experiment %d' % n)
model = BiLSTM(params)
model.setMappings(embeddings, data['mappings'])
if params['lm']:
    model.setLMMappings(lm_f_embeddings, lm_f_word2Idx)
    model.setLMMappings(embeddings, lm_b_word2Idx, forward=False)
if params['prosody']:
    model.prosody = True
    params['feats_to_include']#.extend(params['prosody_feats'])