datasets = { 'unidep_pos': # Name of the dataset {'columns': {1: 'tokens', 3: 'POS'}, # Column 1 contains tokens, column 3 contains POS information 'label': 'POS', # Which column we like to predict 'evaluate': True, # Set true always for single task setups 'commentSymbol': None} # Lines in the input data starting with this string will be skipped } # :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically :: # :: 词向量文件地址,采样Komninos词向量;没有则自动下载 embeddingsPath = 'komninos_english_embeddings.gz' # :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder :: # :: 数据预处理,并保存为cPickle文件 pickleFile = prepareDataset(embeddingsPath, datasets) ############################################################################################################ # # 2.Network training # ############################################################################################################ # :: Load the embeddings and the dataset :: # :: 加载词向量和训练数据 :: embeddings, mappings, data = loadDatasetPickle(pickleFile) params = {'classifier': ['CRF'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)} print("***** Train the model with 1 Epoch and store to disk") model = BiLSTM(params) model.setMappings(mappings, embeddings)
loggingLevel = logging.INFO logger = logging.getLogger() logger.setLevel(loggingLevel) ch = logging.FileHandler(logfile) ch.setLevel(loggingLevel) formatter = logging.Formatter('%(message)s') ch.setFormatter(formatter) logger.addHandler(ch) # Data preprocessing datasetFiles = [ (datasetName, dataColumns), ] pickleFile = prepareDataset(embeddingsPath, datasetFiles, prosody=params['prosody'], prosody_feats=params['prosody_feats']) #Load the embeddings and the dataset embeddings, word2Idx, datasets = loadDatasetPickle(pickleFile) data = datasets[datasetName] print('Training, experiment %d' % n) model = BiLSTM(params) model.setMappings(embeddings, data['mappings']) if params['lm']: model.setLMMappings(lm_f_embeddings, lm_f_word2Idx) model.setLMMappings(embeddings, lm_b_word2Idx, forward=False) if params['prosody']: model.prosody = True params['feats_to_include']#.extend(params['prosody_feats'])