def main():
    part2xy = load_dataset_fast('FILIMDB')
    train_ids, train_texts, train_labels = part2xy['train']

    print('\nTraining classifier on %d examples from train set ...' %
          len(train_texts))
    st = time()
    params = train(train_texts, train_labels)
    print('Classifier trained in %.2fs' % (time() - st))

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME)
Example #2
0
def main(transductive: bool = False):
    try:
        from classifier import pretrain
    except ImportError:
        part2xy = load_dataset_fast('FILIMDB', parts=SCORED_PARTS)
        train_ids, train_texts, train_labels = part2xy['train']
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels)
        print('Classifier trained in %.2fs' % (time() - st))
    else:
        part2xy = load_dataset_fast('FILIMDB',
                                    parts=SCORED_PARTS + ('train_unlabeled', ))
        train_ids, train_texts, train_labels = part2xy['train']
        _, train_unlabeled_texts, _ = part2xy['train_unlabeled']

        st = time()

        if transductive:
            all_texts = list(text for _, text, _ in part2xy.values())
        else:
            all_texts = [train_texts, train_unlabeled_texts]

        total_texts = sum(len(text) for text in all_texts)
        print('\nPretraining classifier on %d examples' % total_texts)
        params = pretrain(all_texts)
        print('Classifier pretrained in %.2fs' % (time() - st))
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels, params)
        print('Classifier trained in %.2fs' % (time() - st))
        del part2xy["train_unlabeled"]

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
Example #3
0
def main():
    try:
        from classifier import pretrain
    except ImportError:
        part2xy = load_dataset_fast('FILIMDB')
        train_ids, train_texts, train_labels = part2xy['train']
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels)
        print('Classifier trained in %.2fs' % (time() - st))
    else:
        part2xy = load_dataset_fast('FILIMDB',
                                    parts=('train', 'dev', 'test',
                                           'train_unlabeled'))
        train_ids, train_texts, train_labels = part2xy['train']
        _, train_unlabeled_texts, _ = part2xy['train_unlabeled']
        all_texts = train_texts + train_unlabeled_texts

        print('\nPretraining classifier on %d examples' % len(all_texts))
        st = time()
        params = pretrain(all_texts)
        print('Classifier pretrained in %.2fs' % (time() - st))
        print('\nTraining classifier on %d examples from train set ...' %
              len(train_texts))
        st = time()
        params = train(train_texts, train_labels, params)
        print('Classifier trained in %.2fs' % (time() - st))
        del part2xy["train_unlabeled"]

    allpreds = []
    for part, (ids, x, y) in part2xy.items():
        print('\nClassifying %s set with %d examples ...' % (part, len(x)))
        st = time()
        preds = classify(x, params)
        print('%s set classified in %.2fs' % (part, time() - st))
        allpreds.extend(zip(ids, preds))

        if y is None:
            print('no labels for %s set' % part)
        else:
            score(preds, y)

    save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')
Example #4
0
def save_preds(allpreds):
    score.save_preds(allpreds, preds_fname=PREDS_FNAME)
    print('\nChecking saved predictions ...')
    score.score_preds(preds_fname=PREDS_FNAME, data_dir='FILIMDB')