Ejemplo n.º 1
0
def main(config):
    # Initialise the model type and arguments
    model, args = init_trainer(config)
    logger.info(args)
    # Read training data
    dataset = Dataset(args['data_path'], args)
    corpus = dataset.get_corpus()
    # Train model
    model.train(corpus)
    # Save model
    if args['save_model'] == True:
        # Save run
        logger.info('Saving Model')
        model.save(args['model_dir'])
        dataset.save(args['data_path'])
    # Perform validation
    valid = Validation()
    x = model.get_vectors()
    df = dataset.get_df()
    # valid.plot_pca(x, df['variety_region'])
    results = valid.cluster_similarities(x, df)
    logger.info(results)
    if args['save_validation'] == True:
        logger.info('Saving Validation')
        config['output'] = results['similarity']
        with open(args['validation_dir'] + '{}.pkl'.format(datetime.now()),
                  "wb") as pickleFile:
            pickle.dump(config, pickleFile)
Ejemplo n.º 2
0
def main(args):
    # Read training data
    dataset = Dataset(args['data_path'])
    corpus = dataset.get_corpus()

    # Train TF-IDF model
    model = TfidfTrainer()
    model.train(corpus)

    # Save run
    model.save(args['model_dir'])
    dataset.save(args['model_dir'])

    # Validate
    valid = Validation()
    x = model.get_vectors()
    df = dataset.get_df()
    valid.plot_pca(x, df['variety_region'])
    print(valid.cluster_similarities(x, df))