コード例 #1
0
def run(task_name, data_dir, pipeline_name, print_predictions, error_analysis,
        remove_stopwords):
    logger.info('>> Running {} experiment'.format(task_name))
    tsk = task(task_name)
    logger.info('>> Loading data...')
    tsk.load(data_dir)
    logger.info('>> retrieving train/data instances...')
    train_X, train_y, test_X, test_y = utils.get_instances(
        tsk, split_train_dev=False)

    logger.info('>> Descriptive statistics dataset:')
    utils.descriptive_statistics(train_X, train_y, test_X, test_y)
    test_X_ref = test_X

    if remove_stopwords:
        if pipeline_name.startswith('cnn'):
            pipeline_name = pipeline_name.split('_')[0]
        pipeline_name = pipeline_name + '_stopwords'

    if pipeline_name.startswith('cnn'):
        pipe = cnn(pipeline_name)
        train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y,
                                                       test_X, test_y)
        logger.info('>> testing CNN...')

    else:
        pipe = pipeline(pipeline_name)

    logger.info('>> training pipeline ' + pipeline_name)

    pipe.fit(train_X, train_y)
    if pipeline_name == 'naive_bayes_counts_lex':
        logger.info("   -- Found {} tokens in lexicon".format(
            pipe.tokens_from_lexicon))

    logger.info('>> testing...')
    sys_y = pipe.predict(test_X)
    # logger.info(utils.print_prediction(test_X, test_y, sys_y))

    if print_predictions:
        logger.info('>> predictions1')
        utils.print_all_predictions(test_X_ref, test_y, sys_y, logger)

    if error_analysis:
        # Used for error evaluation
        logger.info(utils.print_error_analysis(test_X, test_y, sys_y))
        # logger.info(utils.print_confusion_matrix(test_y, sys_y)) # Prints the confusion matrix

    utils.eval(test_y, sys_y, pipeline_name, data_dir)
    if pipeline_name.startswith('naive_bayes'):
        utils.important_features_per_class(pipe.named_steps.frm,
                                           pipe.named_steps.clf,
                                           n=10)
コード例 #2
0
def run(task_name, data_dir, pipeline_name, print_predictions):
    logger.info('>> Running {} experiment'.format(task_name))
    tsk = task(task_name)
    logger.info('>> Loading data...')
    tsk.load(data_dir)
    logger.info('>> retrieving train/data instances...')
    train_X, train_y, test_X, test_y = utils.get_instances(
        tsk, split_train_dev=False)
    test_X_ref = test_X

    if pipeline_name.startswith('cnn'):
        pipe = cnn(pipeline_name)
        train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y,
                                                       test_X, test_y)
        logger.info('>> testing...')
    else:
        pipe = pipeline(pipeline_name)

    logger.info('>> training pipeline ' + pipeline_name)
    pipe.fit(train_X, train_y)
    if pipeline_name == 'naive_bayes_counts_lex':
        logger.info("   -- Found {} tokens in lexicon".format(
            pipe.tokens_from_lexicon))

    logger.info('>> testing...')
    sys_y = pipe.predict(test_X)

    logger.info('>> evaluation...')
    logger.info(utils.eval(test_y, sys_y))

    if print_predictions:
        logger.info('>> predictions')
        utils.print_all_predictions(test_X_ref, test_y, sys_y, logger)
コード例 #3
0
def run(task_name, data_dir, pipeline_name):
    logger.info('>> Running {} experiment'.format(task_name))
    tsk = task(task_name)
    logger.info('>> Loading data...')
    tsk.load(data_dir)
    logger.info('>> retrieving train/test instances...')
    train_X, train_y, test_X, test_y = utils.get_instances(
        tsk, split_train_dev=False)

    if pipeline_name.startswith('cnn'):
        pipe = cnn(pipeline_name)
        train_X, train_y, test_X, test_y = pipe.encode(train_X, train_y,
                                                       test_X, test_y)
        logger.info('>> testing...')
    else:
        pipe = pipeline(pipeline_name)

    logger.info('>> training pipeline ' + pipeline_name)
    pipe.fit(train_X, train_y)

    logger.info('>> testing...')
    sys_y = pipe.predict(test_X)
    logger.info(utils.print_prediction(test_X, test_y, sys_y))
    logger.info('>> evaluation...')
    logger.info(utils.eval(test_y, sys_y))
コード例 #4
0
def evaluate(model, test_X, test_y):
    #sys_y = model.predict_classes(test_X, batch_size=128)
    sys_y = (model.predict(test_X) > 0.5).astype('int32')
    #sys_y = model.predict(test_X > 0.5).astype('int32')
    print(utils.eval(test_y, sys_y))
コード例 #5
0
def evaluate(model, test_X, test_y):
    sys_y = model.predict_classes(test_X, batch_size=128)
    print(utils.eval(test_y, sys_y))