def train_pos(args):
    ######################################################
    #
    # Data preprocessing
    #
    ######################################################
    datasets = {
        args.datasetName:  #Name of the dataset
        {
            'columns': {
                0: 'tokens',
                1: 'POS',
                2: 'chunk_BIO'
            },  #CoNLL format for the input data. Column 1 contains tokens, column 3 contains POS information
            'label': 'POS',  #Which column we like to predict
            'evaluate': True,  #Should we evaluate on this task? Set true always for single task setups
            'commentSymbol': None
        }  #Lines in the input data starting with this string will be skipped. Can be used to skip comments
    }

    # :: Path on your computer to the word embeddings. Embeddings by Komninos et al. will be downloaded automatically ::
    embeddingsPath = args.embeddings
    #'komninos_english_embeddings.gz'

    # :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
    pickleFile = perpareDataset(embeddingsPath, datasets)

    ######################################################
    #
    # The training of the network starts here
    #
    ######################################################

    #Load the embeddings and the dataset
    embeddings, mappings, data = loadDatasetPickle(pickleFile)

    # Some network hyperparameters
    params = {
        'classifier': ['CRF'],
        'LSTM-Size': [100],
        'dropout': (0.25, 0.25)
    }
    model = BiLSTM(params)
    model.setMappings(mappings, embeddings)
    model.setDataset(datasets, data)

    model.modelSavePath = args.model_save + '/[ModelName]_[Epoch].h5'
    model.fit(epochs=25)

    fpath = args.model_save + '/' + args.datasetName + '_1.h5'
    save_dir, model_init = os.path.split(fpath)
    print(save_dir)
    print(model_init)
    remove_except_last_model(save_dir, model_init)
Esempio n. 2
0
def run_experiment(dataset_id, dataset_dict, task, embeddings, mappings, data):
    # set network hyperparameters and mappings/datasets
    model = BiLSTM(network_params)
    model.setMappings(mappings, embeddings)
    model.setDataset(dataset_dict, data)

    # path to store the trained model and model results
    experiment_name = f'{dataset_id}_{task.lower()}'
    model.modelSavePath = models_dir / f'{experiment_name}.h5'
    model.storeResults(results_dir / f'{experiment_name}.csv')

    # build and train the model
    model.buildModel()
    model.fit(
        epochs=500)  # do not limit training by epochs - use early stopping
Esempio n. 3
0
def run_experiment(datasets_dict, lang, task, embeddings, mappings, data):
    # set network hyperparameters and mappings/datasets
    model = BiLSTM(network_params)
    model.setMappings(mappings, embeddings)
    model.setDataset(datasets_dict, data)

    # define the experiment name
    lang_prefix = f'{lang.lower()}_' if lang is not None else ''
    task_suffix = f'_{task.lower()}' if task is not None else ''
    experiment_name = lang_prefix + 'datasets' + task_suffix

    # path to store the trained model and model results
    model.modelSavePath = models_dir / f'{experiment_name}.h5'
    model.storeResults(results_dir / f'{experiment_name}.csv')

    # build and train the model
    model.buildModel()
    model.fit(
        epochs=500)  # do not limit training by epochs - use early stopping
# TODO Replace customClassifier dengan main task + auxiliary task
custom_classifier = {}
custom_classifier[target_task] = [('LSTM', 100), 'CRF']
for task in aux_task:
    custom_classifier[task] = ['CRF']

params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'CNN',
    'customClassifier': custom_classifier
}

model = BiLSTM(params)

model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.storeResults("/".join(
    [args.root_dir_result, args.directory_name,
     "performance.out"]))  # Path to store performance scores for dev / test
model.predictionSavePath = "/".join([
    args.root_dir_result, args.directory_name, "predictions",
    "[ModelName]_[Data].conll"
])  # Path to store predictions
model.modelSavePath = "/".join(
    [args.root_dir_result, args.directory_name,
     "models/[ModelName].h5"])  # Path to store models
model.fit(epochs=args.nb_epoch)
Esempio n. 5
0
    (datasetName, dataColumns),
]

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasetFiles)

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, word2Idx, datasets = loadDatasetPickle(pickleFile)
data = datasets[datasetName]

print("Dataset:", datasetName)
print(data['mappings'].keys())
print("Label key: ", labelKey)
print("Train Sentences:", len(data['trainMatrix']))
print("Dev Sentences:", len(data['devMatrix']))
print("Test Sentences:", len(data['testMatrix']))

model = BiLSTM(params)
model.setMappings(embeddings, data['mappings'])
model.setTrainDataset(data, labelKey)
model.verboseBuild = True
model.modelSavePath = "models/%s/%s/[DevScore]_[TestScore]_[Epoch].h5" % (
    datasetName, labelKey)  #Enable this line to save the model to the disk
model.evaluate(50)
}
# :: Path on your computer to the word embeddings. Embeddings by Reimers et al. will be downloaded automatically ::
embeddingsPath = 'final.txt'

# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
pickleFile = perpareDataset(embeddingsPath, datasets)

######################################################
#
# The training of the network starts here
#
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100, 100],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'LSTM',
    'maxCharLength': 30
}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.modelSavePath = "models/[ModelName]_[DevScore]_[TestScore]_[Epoch].h5"
model.fit(epochs=25)
# :: Prepares the dataset to be used with the LSTM-network. Creates and stores cPickle files in the pkl/ folder ::
# :: 数据预处理,并保存为cPickle文件
pickleFile = prepareDataset(embeddingsPath, datasets)


############################################################################################################
#
# 2.Network training
#
############################################################################################################
# :: Load the embeddings and the dataset ::
# :: 加载词向量和训练数据 ::
embeddings, mappings, data = loadDatasetPickle(pickleFile)
params = {'classifier': ['CRF'], 'LSTM-Size': [100], 'dropout': (0.25, 0.25)}

print("***** Train the model with 1 Epoch and store to disk")
model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.modelSavePath = "models/my_model_[Epoch].h5"
model.fit(epochs=1)

print("\n\n\n\n------------------------")
print("***** Load the model and continue training")
newModel = BiLSTM.loadModel('models/my_model_1.h5')
newModel.setDataset(datasets, data)
newModel.modelSavePath = "models/my_reloaded_model_[Epoch].h5"
newModel.fit(epochs=1)
print("***** retrained model store at "+newModel.modelSavePath)
Esempio n. 8
0
# print('=========================')

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100, 100],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'CNN',
    'maxCharLength': 50
}

print('#######################' + args.mod + ' #######################')
model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)
model.modelSavePath = "/datastore/liu121/nosqldb2/emnlp_ukplab/models/[ModelName]_bbn.h5"
eval_result = model.fit(epochs=100)


def report(eval_result, filePath):
    with open(filePath, 'w+') as f:
        for key in eval_result:
            info = eval_result[key]
            f.write('====================' + key + '====================')
            f.write(info['epoch'] + '\n')
            f.write(info["per_f1"] + "\n")
            f.write(info['per_pre'] + '\n')
            f.write(info['per_recall'] + '\n')
            f.write(info["micro_f1"] + '\n')
            f.write(info["micro_pre"] + '\n')
            f.write(info["micro_recall"] + '\n')
elif model_name == "blstm-crf":
    params = {
        'classifier': ['CRF'],
        'LSTM-Size': [100, 100],
        'dropout': (0.25, 0.25),
        'charEmbeddings': 'LSTM',
        'maxCharLength': 50
    }

elif model_name == "cnn-crf":
    params = {
        'classifier': ['CRF'],
        'LSTM-Size': [100, 100],
        'dropout': (0.25, 0.25),
        'charEmbeddings': 'CNN',
        'maxCharLength': 50
    }

else:
    print("existing model names are (1) crf, (2) blstm-crf, (3) cnn-crf")
    exit()

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data)

#model.storeResults('ler-results.csv') #Path to store performance scores for dev / test

# pickle the model
model.modelSavePath = 'models/blstm-' + model_name + '.h5'
model.fit(epochs=100)
Esempio n. 10
0
######################################################

#Load the embeddings and the dataset
embeddings, mappings, data = loadDatasetPickle(pickleFile)

# Some network hyperparameters
params = {
    'classifier': ['CRF'],
    'LSTM-Size': [100],
    'dropout': (0.25, 0.25),
    'charEmbeddings': 'CNN'
}

model = BiLSTM(params)
model.setMappings(mappings, embeddings)
model.setDataset(datasets, data,
                 mainModelName='MIT_Restaurant')  # KHUSUS MULTITSAK

model.storeResults("/".join(
    ["results", args.directory_name,
     "performance.out"]))  #Path to store performance scores for dev / test
model.predictionSavePath = "/".join([
    "results", args.directory_name, "predictions",
    "[ModelName]_[Epoch]_[Data].conll"
])  #Path to store predictions
model.modelSavePath = "/".join([
    "results", args.directory_name,
    "models/model_[DevScore]_[TestScore]_[Epoch].h5"
])  #Path to store models
model.fit(epochs=50)
        model = BiLSTM(params)
        if args.batch_range is not None:
            model.setBatchRangeLength(args.batch_range)
        model.setMappings(mappings, embeddings)
        model.setDataset(datasets, data,
                         mainModelName=args.target_task)  # KHUSUS MULTITSAK

        model.storeResults("/".join([
            args.root_dir_result, args.directory_name, "performance.out"
        ]))  #Path to store performance scores for dev / test
        model.predictionSavePath = "/".join([
            args.root_dir_result, args.directory_name, "predictions",
            "[ModelName]_[Data].conll"
        ])  #Path to store predictions
        model.modelSavePath = "/".join([
            args.root_dir_result, args.directory_name, "models/[ModelName].h5"
        ])  #Path to store models

        model.fit(epochs=args.nb_epoch)
        model.saveParams("/".join(
            [args.root_dir_result, args.directory_name, "param"]))
    else:

        for current_run in range(1, args.nb_run + 1):
            model = BiLSTM(params)
            if args.batch_range is not None:
                model.setBatchRangeLength(args.batch_range)
            model.setMappings(mappings, embeddings)
            model.setDataset(
                datasets, data,
                mainModelName=args.target_task)  # KHUSUS MULTITSAK