Example #1
0
def main(params=None):

    if params is None:
        params = {
            'dataset': 'DRLD',
            'exp_name': 'char_test',
            'test_fold': 0,
            'n_dev_folds': 1,
            'min_doc_thresh': 1,
            'initialize_word_vectors': True,
            'vectors': 'chars_word2vec_25',  # default_word2vec_300, anes_word2vec_300, chars_word2vec_25, eye_1 ...
            'init_scale': 0.2,
            'add_OOV_dim': True,
            'win': 1,                   # size of context window
            'add_DRLD': True,
            'rnn_type': 'basic',        # basic, GRU, or LSTM
            'n_hidden': 50,             # size of hidden units
            'pooling_method': 'max',    # max, mean, or attention1/2
            'bidirectional': True,
            'bi_combine': 'concat',        # concat, max, or mean
            'train_embeddings': True,
            'lr': 0.1,                  # learning rate
            'lr_emb_fac': 1,            # factor to modify learning rate for embeddings
            'decay_delay': 10,           # number of epochs with no improvement before decreasing learning rate
            'decay_factor': 0.5,        # factor by which to multiply learning rate in case of delay
            'n_epochs': 300,
            'add_OOV_noise': True,
            'OOV_noise_prob': 0.01,
            'minibatch_size': 16,
            'classify_minibatch_size': 64,
            'ensemble': False,
            'save_model': True,
            'seed': 42,
            'verbose': 1,
            'reuse': False,
            'orig_T': 0.04,
            'tau': 0.01,
            'clip_gradients': False
        }

    #params = fh.read_json('/Users/dcard/Projects/CMU/ARK/guac/experiments/best_mod.json')
    #params['exp_name'] += '_best'
    #params['n_hidden'] = int(params['n_hidden'])

    keys = params.keys()
    keys.sort()
    for key in keys:
        print key, ':', params[key]

    # seed the random number generators
    np.random.seed(params['seed'])
    random.seed(params['seed'])

    vector_type = params['vectors'].split('_')[0]
    params['word2vec_dim'] = int(params['vectors'].split('_')[-1])


    reuser = None
    if params['reuse']:
        reuser = reusable_holdout.ReuseableHoldout(T=params['orig_T'], tau=params['tau'])

    if params['dataset'] == 'DRLD':
        datasets = ['Democrat-Likes', 'Democrat-Dislikes', 'Republican-Likes', 'Republican-Dislikes']
    elif params['dataset'] == 'MIP':
        datasets = ['MIP-Personal-1', 'MIP-Personal-2', 'MIP-Political-1', 'MIP-Political-2']
    elif params['dataset'] == 'MOLD':
        datasets = ['McCain-Likes', 'McCain-Dislikes', 'Obama-Likes', 'Obama-Dislikes']
    elif params['dataset'] == 'Primary':
        datasets = ['Obama-Primary', 'Clinton-Primary']
    elif params['dataset'] == 'General':
        datasets = ['Obama-General', 'McCain-General']
    else:
        datasets = [params['dataset']]

    np.random.seed(params['seed'])
    random.seed(params['seed'])

    best_valid_f1s = []
    best_true_valid_f1s = []
    best_test_f1s = []
    best_train_f1s = []

    test_prediction_arrays = []

    output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'])
    output_filename = fh.make_filename(output_dir, 'params', 'txt')
    fh.write_to_json(params, output_filename)

    for dev_fold in range(params['n_dev_folds']):
        print "dev fold =", dev_fold

        output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'], 'fold' + str(dev_fold))

        if vector_type == 'chars':
            all_data, words2idx, items, all_labels = common.load_char_data(datasets, params['test_fold'], dev_fold)
        else:
            all_data, words2idx, items, all_labels = common.load_data(datasets, params['test_fold'], dev_fold,
                                                                      params['min_doc_thresh'])
        train_xy, valid_xy, test_xy = all_data
        train_lex, train_y = train_xy
        valid_lex, valid_y = valid_xy
        test_lex, test_y = test_xy


        #if params['minibatch_size'] > 1 or params['classify_minibatch_size'] > 1:
        print "padding input with zeros"
        all_data, all_masks = common.prepare_data(train_lex, valid_lex, test_lex)
        train_lex, valid_lex, test_lex = all_data
        train_masks, valid_masks, test_masks = all_masks
        #else:
        #    train_masks = [np.ones(len(x)).astype('int32') for x in train_lex]
        #    valid_masks = [np.ones(len(x)).astype('int32') for x in valid_lex]
        #    test_masks = [np.ones(len(x)).astype('int32') for x in test_lex]

        print "expanding x with context win dows"
        # Rejigger to convert x to contex win in advance
        train_x_win = expand_x_with_context_win(train_lex, params['win'])
        valid_x_win = expand_x_with_context_win(valid_lex, params['win'])
        test_x_win = expand_x_with_context_win(test_lex, params['win'])
        order = range(len(train_lex))
        print "done"

        train_items, dev_items, test_items = items
        vocsize = len(words2idx.keys())
        idx2words = dict((k, v) for v, k in words2idx.iteritems())
        best_test_predictions = None

        n_sentences = len(train_lex)
        print "vocsize = ", vocsize, 'n_train', n_sentences

        codes = all_labels.columns
        n_items, n_codes = all_labels.shape

        # get the words in the sentences for the test and validation sets
        words_valid = [map(lambda x: idx2words[x], w) for w in valid_lex]
        groundtruth_test = test_y[:]
        words_test = [map(lambda x: idx2words[x], w) for w in test_lex]

        #if vector_type == 'eye':
        #    initial_embeddings = np.eye(vocsize)
        #    emb_dim = initial_embeddings.shape[1]
        if params['initialize_word_vectors']:
            initial_embeddings = common.load_embeddings(params, words2idx)
            emb_dim = initial_embeddings.shape[1]
        else:
            initial_embeddings = None
            emb_dim = params['word2vec_dim']
        print "embedding dim =", emb_dim


        temp_output = fh.make_filename(output_dir, 'embedding_labels', 'json')
        fh.write_to_json(idx2words, temp_output)


        extra_input_dims = 0
        if params['add_DRLD']:
            extra_input_dims = 2

        print "Building RNN"
        rnn = RNN(nh=params['n_hidden'],
                  nc=n_codes,
                  ne=vocsize,
                  de=emb_dim,
                  cs=params['win'],
                  extra_input_dims=extra_input_dims,
                  initial_embeddings=initial_embeddings,
                  init_scale=params['init_scale'],
                  rnn_type=params['rnn_type'],
                  train_embeddings=params['train_embeddings'],
                  pooling_method=params['pooling_method'],
                  bidirectional=params['bidirectional'],
                  bi_combine=params['bi_combine'],
                  clip_gradients=params['clip_gradients']
                  )

        temp_filename = fh.make_filename(output_dir, 'initial_embeddings', 'npy')
        rnn.save_embeddings(temp_filename)

        train_likes = [1 if re.search('Likes', i) else 0 for i in train_items]
        dev_likes = [1 if re.search('Likes', i) else 0 for i in dev_items]
        test_likes = [1 if re.search('Likes', i) else 0 for i in test_items]

        train_dem = [1 if re.search('Democrat', i) else 0 for i in train_items]
        dev_dem = [1 if re.search('Democrat', i) else 0 for i in dev_items]
        test_dem = [1 if re.search('Democrat', i) else 0 for i in test_items]

        train_extra = [[train_likes[i], train_dem[i]] for i, t in enumerate(train_items)]
        dev_extra = [[dev_likes[i], dev_dem[i]] for i, t in enumerate(dev_items)]
        test_extra = [[test_likes[i], test_dem[i]] for i, t in enumerate(test_items)]



        ### LOAD
        #rnn.load(output_dir)

        # train with early stopping on validation set
        best_f1 = -np.inf
        params['clr'] = params['lr']
        for e in xrange(params['n_epochs']):
            # shuffle
            #shuffle([train_lex, train_y, train_extra, train_masks], params['seed'])   # shuffle the input data
            shuffle([order, train_lex, train_y, train_extra, train_masks], params['seed'])   # shuffle the input data
            params['ce'] = e                # store the current epoch
            tic = timeit.default_timer()

            ms = params['minibatch_size']
            n_train = len(train_lex)
            nll = 0

            #for i, orig_x in enumerate(train_lex):
            for iteration, i in enumerate(range(0, n_train, ms)):
                #orig_x = train_lex[i]
                #n_words = len(orig_x)
                #if params['add_OOV_noise']:
                #    draws = np.random.rand(n_words)
                #    x = [OOV_index if draws[i] < params['OOV_noise_prob'] else orig_x[i] for i in range(n_words)]
                #else:
                #    x = orig_x
                #y = train_y[i]
                extra = train_extra[i]
                #mask = train_masks[i]

                minibatch_x, minibatch_mask,\
                minibatch_extra, minibatch_y= select_minibatch(train_x_win, train_masks, train_extra, train_y,
                                                               params['win'], i, ms, order,
                                                               params['add_OOV_noise'], params['OOV_noise_prob'])

                #if i == 0:
                #    print '\n'.join([' '.join([idx2words[idx] for idx in minibatch_x[:, k, 0].tolist()]) for
                #           k in range(ms)])

                nll_i, a_sum = rnn.train(minibatch_x, minibatch_mask, minibatch_y, params['win'],
                                params['clr'],
                                params['lr_emb_fac'], extra_input_dims, minibatch_extra)
                nll += nll_i
                #rnn.train(x, mask, y, params['win'], params['clr'], params['lr_emb_fac'],
                #          extra_input_dims, extra)
                print '[learning] epoch %i >> %2.2f%%' % (
                    e, (i + 1) * 100. / float(n_sentences)),
                print 'completed in %.2f (sec), nll = %.2f, a_sum = %.1f <<\r' % (timeit.default_timer() - tic,
                                                                                  nll, np.max(a_sum)),
                sys.stdout.flush()

                if np.isnan(nll) or np.isinf(nll):
                    if best_f1 > 0:
                        break
                    else:
                        return {'loss': 1.0,
                                'final_test_f1': 0,
                                'valid_f1s': 0,
                                'true_valid_f1s': 0,
                                'train_f1s': 0,
                                'test_f1s': 0,
                                'status': STATUS_OK
                                }

            # evaluation // back into the real world : idx -> words
            print ""

            #print "true y", train_y[-1]
            #y_pred = rnn.classify(np.array(train_x_win[-1]).reshape((1, len(train_x_win[-1]))),
            #                      train_masks[-1], params['win'], extra_input_dims, train_extra[-1])[0]
            #print "pred y", y_pred

            #if params['pooling_method'] == 'attention1' or params['pooling_method'] == 'attention2':
            #    if extra_input_dims == 0:
            #        r = np.random.randint(0, len(train_lex))
            #        print r, rnn.a_sum_check(np.asarray(contextwin(train_lex[r], params['win'])).astype('int32'))

            predictions_train = predict(n_train, params['classify_minibatch_size'], train_x_win, train_masks,
                                         train_y, params['win'], extra_input_dims, train_extra, rnn, order)
            n_valid = len(valid_lex)
            n_test = len(test_lex)
            predictions_valid = predict(n_valid, params['classify_minibatch_size'], valid_x_win, valid_masks,
                                        valid_y, params['win'], extra_input_dims, dev_extra, rnn)
            predictions_test = predict(n_test, params['classify_minibatch_size'], test_x_win, test_masks,
                                        test_y, params['win'], extra_input_dims, test_extra, rnn)

            """
            predictions_train = [rnn.classify(x, train_masks[i], params['win'],
                                              extra_input_dims, train_extra[i])[0] for i, x in enumerate(train_lex)]
            predictions_valid = [rnn.classify(x, valid_masks[i], params['win'],
                                              extra_input_dims, dev_extra[i])[0] for i, x in enumerate(valid_lex)]
            predictions_test = [rnn.classify(x, test_masks[i], params['win'],
                                             extra_input_dims, test_extra[i])[0] for i, x in enumerate(test_lex)]
            """

            train_f1 = common.calc_mean_f1(predictions_train, train_y)
            test_f1 = common.calc_mean_f1(predictions_test, test_y)
            valid_f1 = common.calc_mean_f1(predictions_valid, valid_y)

            question_f1s = []
            question_pps = []

            print "train_f1 =", train_f1, "valid_f1 =", valid_f1, "test_f1 =", test_f1

            if valid_f1 > best_f1:
                best_rnn = copy.deepcopy(rnn)
                best_f1 = valid_f1
                best_test_predictions = predictions_test

                if params['verbose']:
                    print('NEW BEST: epoch', e,
                          'valid f1', valid_f1,
                          'best test f1', test_f1)

                params['tr_f1'] = train_f1
                params['te_f1'] = test_f1
                params['v_f1'] = valid_f1
                params['be'] = e            # store the current epoch as a new best

            # learning rate decay if no improvement in a given number of epochs
            if abs(params['be']-params['ce']) >= params['decay_delay']:
                params['clr'] *= params['decay_factor']
                params['be'] = params['ce']
                print "Reverting to current best; new learning rate = ", params['clr']
                # also reset to the previous best
                rnn = best_rnn

            if params['clr'] < 1e-5:
                break

            if best_f1 == 1.0:
                break

            if best_f1 == 0 and e > 7:
                break

        if params['save_model']:
            predictions_test = predict(len(test_y), params['classify_minibatch_size'], test_x_win, test_masks,
                                       test_y, params['win'], extra_input_dims, test_extra, best_rnn)
            best_rnn.save(output_dir)
            common.write_predictions(datasets, params['test_fold'], dev_fold, predictions_test, test_items, output_dir)

        print('BEST RESULT: epoch', params['be'],
              'train F1 ', params['tr_f1'],
              'valid F1', params['v_f1'],
              'best test F1', params['te_f1'],
              'with the model', output_dir)


        best_true_valid_f1s.append(params['v_f1'])
        best_test_f1s.append(params['te_f1'])
        best_train_f1s.append(params['tr_f1'])
        if reuser is not None:
            best_valid_f1 = reuser.mask_value(params['v_f1'], params['tr_f1'])
        else:
            best_valid_f1 = params['v_f1']
        best_valid_f1s.append(best_valid_f1)


        test_prediction_arrays.append(np.array(best_test_predictions, dtype=int))

    params['ensemble'] = False
    if params['ensemble']:
        test_predictions_stack = np.dstack(test_prediction_arrays)
        final_predictions = stats.mode(test_predictions_stack, axis=2)[0][:, :, 0]
        predicted_df = pd.DataFrame(final_predictions, index=test_items, columns=codes)
        true_df = pd.DataFrame(np.array(test_y), index=test_items, columns=codes)
        final_test_f1, final_test_pp = evaluation.calc_macro_mean_f1_pp(true_df, predicted_df)
    else:
        final_test_f1 = np.median(best_test_f1s)

    return {'loss': -np.median(best_valid_f1s),
            'final_test_f1': final_test_f1,
            'valid_f1s': best_valid_f1s,
            'train_f1s': best_train_f1s,
            'true_valid_f1s': best_true_valid_f1s,
            'test_f1s': best_test_f1s,
            'status': STATUS_OK
            }
Example #2
0
def main(params=None):

    if params is None:
        params = {
            'dataset': 'DRLD',
            'exp_name': 'best_minibatch_mod',
            'test_fold': 0,
            'n_dev_folds': 1,
            'min_doc_thresh': 1,
            'initialize_word_vectors': False,
            'vectors': 'anes_word2vec_300',  # default_word2vec_300, anes_word2vec_300, chars_word2vec_25, eye_1 ...
            'init_scale': 0.2,
            'add_OOV_dim': False,
            'win': 1,                   # size of context window
            'add_DRLD': False,
            'rnn_type': 'LSTM',        # basic, GRU, or LSTM
            'n_hidden': 50,             # size of hidden units
            'pooling_method': 'last',    # max, mean, or attention1/2
            'bidirectional': False,
            'bi_combine': 'concat',        # concat, max, or mean
            'train_embeddings': False,
            'lr': 0.025,                  # learning rate
            'lr_emb_fac': 0.2,            # factor to modify learning rate for embeddings
            'decay_delay': 5,           # number of epochs with no improvement before decreasing learning rate
            'decay_factor': 0.5,        # factor by which to multiply learning rate in case of delay
            'n_epochs': 100,
            'add_OOV_noise': False,
            'OOV_noise_prob': 0.01,
            'minibatch_size': 1,
            'classify_minibatch_size': 1,
            'ensemble': False,
            'save_model': True,
            'seed': 42,
            'verbose': 1,
            'reuse': False,
            'orig_T': 0.04,
            'tau': 0.01,
            'xavier_init': True
        }

    params = fh.read_json('/Users/dcard/Projects/CMU/ARK/guac/experiments/rnn/bayes_opt_rnn_LSTM_reuse_mod_34_rerun/params.txt')
    params['n_hidden'] = int(params['n_hidden'])

    keys = params.keys()
    keys.sort()
    for key in keys:
        print key, ':', params[key]

    # seed the random number generators
    np.random.seed(params['seed'])
    random.seed(params['seed'])

    vector_type = params['vectors'].split('_')[0]
    params['word2vec_dim'] = int(params['vectors'].split('_')[-1])


    reuser = None
    if params['reuse']:
        reuser = reusable_holdout.ReuseableHoldout(T=params['orig_T'], tau=params['tau'])

    if params['dataset'] == 'DRLD':
        datasets = ['Democrat-Likes', 'Democrat-Dislikes', 'Republican-Likes', 'Republican-Dislikes']
    elif params['dataset'] == 'MIP':
        datasets = ['MIP-Personal-1', 'MIP-Personal-2', 'MIP-Political-1', 'MIP-Political-2']
    elif params['dataset'] == 'MOLD':
        datasets = ['McCain-Likes', 'McCain-Dislikes', 'Obama-Likes', 'Obama-Dislikes']
    elif params['dataset'] == 'Primary':
        datasets = ['Obama-Primary', 'Clinton-Primary']
    elif params['dataset'] == 'General':
        datasets = ['Obama-General', 'McCain-General']
    else:
        datasets = [params['dataset']]

    np.random.seed(params['seed'])
    random.seed(params['seed'])

    best_valid_f1s = []
    best_true_valid_f1s = []
    best_test_f1s = []
    best_train_f1s = []

    test_prediction_arrays = []

    output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'])
    output_filename = fh.make_filename(output_dir, 'params', 'txt')
    fh.write_to_json(params, output_filename)

    for dev_fold in range(params['n_dev_folds']):
        print "dev fold =", dev_fold

        output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'], 'fold' + str(dev_fold))

        all_data, words2idx, items, all_labels = common.load_data(datasets, params['test_fold'], dev_fold,
                                                                  params['min_doc_thresh'])
        train_xy, valid_xy, test_xy = all_data
        train_lex, train_y = train_xy
        valid_lex, valid_y = valid_xy
        test_lex, test_y = test_xy

        train_lengths = [len(x) for x in train_lex]
        length_order = np.argsort(train_lengths)

        #if params['minibatch_size'] > 1 or params['classify_minibatch_size'] > 1:
        print "padding input with zeros"
        #all_data, all_masks = common.prepare_data(train_lex, valid_lex, test_lex, preset_max=100)
        all_data, all_masks = common.prepare_data(train_lex, valid_lex, test_lex)
        train_lex, valid_lex, test_lex = all_data
        train_masks, valid_masks, test_masks = all_masks
        #else:
        #    train_masks = [np.ones(len(x)).astype('int32') for x in train_lex]
        #    valid_masks = [np.ones(len(x)).astype('int32') for x in valid_lex]
        #    test_masks = [np.ones(len(x)).astype('int32') for x in test_lex]

        print "expanding x with context win dows"
        # Rejigger to convert x to contex win in advance
        train_x_win = expand_x_with_context_win(train_lex, params['win'])
        valid_x_win = expand_x_with_context_win(valid_lex, params['win'])
        test_x_win = expand_x_with_context_win(test_lex, params['win'])
        order = range(len(train_lex))
        print "done"

        train_items, dev_items, test_items = items
        vocsize = len(words2idx.keys())
        idx2words = dict((k, v) for v, k in words2idx.iteritems())
        best_test_predictions = None

        n_sentences = len(train_lex)
        print "vocsize = ", vocsize, 'n_train', n_sentences

        codes = all_labels.columns
        n_items, n_codes = all_labels.shape

        # get the words in the sentences for the test and validation sets
        words_valid = [map(lambda x: idx2words[x], w) for w in valid_lex]
        groundtruth_test = test_y[:]
        words_test = [map(lambda x: idx2words[x], w) for w in test_lex]

        #if vector_type == 'eye':
        #    initial_embeddings = np.eye(vocsize)
        #    emb_dim = initial_embeddings.shape[1]
        if params['initialize_word_vectors']:
            initial_embeddings = common.load_embeddings(params, words2idx)
            emb_dim = initial_embeddings.shape[1]
        else:
            initial_embeddings = None
            emb_dim = params['word2vec_dim']
        print "embedding dim =", emb_dim

        extra_input_dims = 0
        if params['add_DRLD']:
            #extra_input_dims = 4
            extra_input_dims = 2

        print "Building RNN"
        rnn = RNN(nh=params['n_hidden'],
                  nc=n_codes,
                  ne=vocsize,
                  de=emb_dim,
                  cs=params['win'],
                  extra_input_dims=extra_input_dims,
                  initial_embeddings=initial_embeddings,
                  init_scale=params['init_scale'],
                  rnn_type=params['rnn_type'],
                  train_embeddings=params['train_embeddings'],
                  pooling_method=params['pooling_method'],
                  bidirectional=params['bidirectional'],
                  bi_combine=params['bi_combine'],
                  xavier_init=params['xavier_init']
                  )

        # add extra dimensions to differentiate between paired datasets
        train_likes = [1 if re.search('Likes', i) else 0 for i in train_items]
        dev_likes = [1 if re.search('Likes', i) else 0 for i in dev_items]
        test_likes = [1 if re.search('Likes', i) else 0 for i in test_items]

        train_dem = [1 if re.search('Democrat', i) else 0 for i in train_items]
        dev_dem = [1 if re.search('Democrat', i) else 0 for i in dev_items]
        test_dem = [1 if re.search('Democrat', i) else 0 for i in test_items]

        """
        train_obama = [1 if re.search('Obama', i) else 0 for i in train_items]
        dev_obama = [1 if re.search('Obama', i) else 0 for i in dev_items]
        test_obama = [1 if re.search('Obama', i) else 0 for i in test_items]

        train_personal = [1 if re.search('Personal', i) else 0 for i in train_items]
        dev_personal = [1 if re.search('Personal', i) else 0 for i in dev_items]
        test_personal = [1 if re.search('Personal', i) else 0 for i in test_items]

        train_extra = [[train_likes[i], train_dem[i], train_obama[i], train_personal[i]] for i, t in enumerate(train_items)]
        dev_extra = [[dev_likes[i], dev_dem[i], dev_obama[i], dev_personal[i]] for i, t in enumerate(dev_items)]
        test_extra = [[test_likes[i], test_dem[i], test_obama[i], test_personal[i]] for i, t in enumerate(test_items)]
        """

        train_extra = [[train_likes[i], train_dem[i]] for i, t in enumerate(train_items)]
        dev_extra = [[dev_likes[i], dev_dem[i]] for i, t in enumerate(dev_items)]
        test_extra = [[test_likes[i], test_dem[i]] for i, t in enumerate(test_items)]


        ### LOAD
        rnn.load(output_dir)

        # train with early stopping on validation set
        best_f1 = -np.inf
        params['clr'] = params['lr']
        n_train = len(order)



        predictions_train = predict(n_train, params['classify_minibatch_size'], train_x_win, train_masks,
                                     train_y, params['win'], extra_input_dims, train_extra, rnn, order)
        n_valid = len(valid_lex)
        n_test = len(test_lex)
        predictions_valid = predict(n_valid, params['classify_minibatch_size'], valid_x_win, valid_masks,
                                    valid_y, params['win'], extra_input_dims, dev_extra, rnn)
        predictions_test = predict(n_test, params['classify_minibatch_size'], test_x_win, test_masks,
                                    test_y, params['win'], extra_input_dims, test_extra, rnn)

        """
        predictions_train = [rnn.classify(x, train_masks[i], params['win'],
                                          extra_input_dims, train_extra[i])[0] for i, x in enumerate(train_lex)]
        predictions_valid = [rnn.classify(x, valid_masks[i], params['win'],
                                          extra_input_dims, dev_extra[i])[0] for i, x in enumerate(valid_lex)]
        predictions_test = [rnn.classify(x, test_masks[i], params['win'],
                                         extra_input_dims, test_extra[i])[0] for i, x in enumerate(test_lex)]
        """

        train_f1 = common.calc_mean_f1(predictions_train, train_y)
        test_f1 = common.calc_mean_f1(predictions_test, test_y)
        valid_f1 = common.calc_mean_f1(predictions_valid, valid_y)

        output_dir = fh.makedirs(output_dir, 'responses')

        ms = 1

        for i in range(n_train):
            mb_x, mb_masks, mb_extra, mb_y = select_minibatch(train_x_win, train_masks, train_extra, train_y,
                                                              params['win'], i, ms, order=range(len(train_y)))

            h, W, b, p_y, s, i_f, i_r, \
                f_f, f_r, o_f, o_r, c = rnn.step_through(mb_x, mb_masks, params['win'], extra_input_dims, mb_extra)

            temp = np.dot(h, W) + b
            s = 1.0/(1.0 + np.exp(-temp))
            output_filename = fh.make_filename(output_dir, train_items[i], 'csv')
            np.savetxt(output_filename, s[:, 0, :], delimiter=',')
            output_npy_files(output_dir, train_items[i], i_f, i_r, f_f, f_r, o_f, o_r, h, c)

        for i in range(n_valid):
            mb_x, mb_masks, mb_extra, mb_y = select_minibatch(valid_x_win, valid_masks, dev_extra, valid_y,
                                                              params['win'], i, ms, order=range(len(valid_y)))

            h, W, b, p_y, s, i_f, i_r, \
                f_f, f_r, o_f, o_r, c = rnn.step_through(mb_x, mb_masks, params['win'], extra_input_dims, mb_extra)

            temp = np.dot(h, W) + b
            s = 1.0/(1.0 + np.exp(-temp))
            output_filename = fh.make_filename(output_dir, dev_items[i], 'csv')
            np.savetxt(output_filename, s[:, 0, :], delimiter=',')
            output_npy_files(output_dir, dev_items[i], i_f, i_r, f_f, f_r, o_f, o_r, h, c)

        for i in range(n_test):
            mb_x, mb_masks, mb_extra, mb_y = select_minibatch(test_x_win, test_masks, test_extra, test_y,
                                                              params['win'], i, ms, order=range(len(test_y)))

            h, W, b, p_y, s, i_f, i_r,\
                f_f, f_r, o_f, o_r, c = rnn.step_through(mb_x, mb_masks, params['win'], extra_input_dims, mb_extra)

            temp = np.dot(h, W) + b
            s = 1.0/(1.0 + np.exp(-temp))
            output_filename = fh.make_filename(output_dir, test_items[i], 'csv')
            np.savetxt(output_filename, s[:, 0, :], delimiter=',')
            output_npy_files(output_dir, test_items[i], i_f, i_r, f_f, f_r, o_f, o_r, h, c)

        print "train_f1 =", train_f1, "valid_f1 =", valid_f1, "test_f1 =", test_f1
Example #3
0
def main(params=None):

    if params is None:
        params = {
            'exp_name': 'minibatch_test',
            'test_fold': 0,
            'n_dev_folds': 1,
            'min_doc_thresh': 1,
            'initialize_word_vectors': True,
            'vectors': 'anes_word2vec',  # default_word2vec, anes_word2vec ...
            'word2vec_dim': 300,
            'init_scale': 0.2,
            'add_OOV': True,
            'win': 3,                   # size of context window
            'add_DRLD': False,
            'rnn_type': 'basic',        # basic, GRU, or LSTM
            'n_hidden': 3,             # size of hidden units
            'pooling_method': 'max',    # max, mean, or attention1/2
            'bidirectional': False,
            'bi_combine': 'mean',        # concat, max, or mean
            'train_embeddings': True,
            'lr': 0.1,                  # learning rate
            'lr_emb_fac': 0.2,            # factor to modify learning rate for embeddings
            'decay_delay': 5,           # number of epochs with no improvement before decreasing learning rate
            'decay_factor': 0.5,        # factor by which to multiply learning rate in case of delay
            'n_epochs': 10,
            'add_OOV_noise': False,
            'OOV_noise_prob': 0.01,
            'minibatch_size': 1,
            'ensemble': False,
            'save_model': True,
            'seed': 42,
            'verbose': 1,
            'reuse': False,
            'orig_T': 0.04,
            'tau': 0.01
        }

    # load params from a previous experiment
    params = fh.read_json('/Users/dcard/Projects/CMU/ARK/guac/experiments/best_mod.json')
    params['exp_name'] += '_minibatch_16'
    params['n_hidden'] = int(params['n_hidden'])
    params['orig_T'] = 0.02
    params['tau'] = 0.005


    reuser = None
    if params['reuse']:
        reuser = reusable_holdout.ReuseableHoldout(T=params['orig_T'], tau=params['tau'])

    keys = params.keys()
    keys.sort()
    for key in keys:
        print key, ':', params[key]

    # seed the random number generators
    np.random.seed(params['seed'])
    random.seed(params['seed'])

    datasets = ['Democrat-Likes', 'Democrat-Dislikes', 'Republican-Likes', 'Republican-Dislikes']

    np.random.seed(params['seed'])
    random.seed(params['seed'])

    best_valid_f1s = []
    best_test_f1s = []

    test_prediction_arrays = []

    output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'])
    output_filename = fh.make_filename(output_dir, 'params', 'json')
    fh.write_to_json(params, output_filename)

    for dev_fold in range(params['n_dev_folds']):
        print "dev fold =", dev_fold

        output_dir = fh.makedirs(defines.exp_dir, 'rnn', params['exp_name'], 'fold' + str(dev_fold))
        results = []

        all_data, words2idx, items, all_labels = common.load_data(datasets, params['test_fold'], dev_fold,
                                                                  params['min_doc_thresh'])
        train_xy, valid_xy, test_xy = all_data
        train_lex, train_y = train_xy
        valid_lex, valid_y = valid_xy
        test_lex, test_y = test_xy
        train_items, dev_items, test_items = items
        vocsize = len(words2idx.keys())
        idx2words = dict((k, v) for v, k in words2idx.iteritems())
        best_test_predictions = None

        n_sentences = len(train_lex)
        print "vocsize = ", vocsize, 'n_train', n_sentences

        codes = all_labels.columns
        n_items, n_codes = all_labels.shape

        # get the words in the sentences for the test and validation sets
        words_valid = [map(lambda x: idx2words[x], w) for w in valid_lex]
        groundtruth_test = test_y[:]
        words_test = [map(lambda x: idx2words[x], w) for w in test_lex]

        initial_embeddings = common.load_embeddings(params, words2idx)
        OOV_index = words2idx['__OOV__']
        emb_dim = initial_embeddings.shape[1]
        print 'emb_dim =', emb_dim

        extra_input_dims = 0
        if params['add_DRLD']:
            extra_input_dims = 2

        print "Building RNN"
        rnn = RNN(nh=params['n_hidden'],
                  nc=n_codes,
                  ne=vocsize,
                  de=emb_dim,
                  cs=params['win'],
                  extra_input_dims=extra_input_dims,
                  initial_embeddings=initial_embeddings,
                  init_scale=params['init_scale'],
                  rnn_type=params['rnn_type'],
                  train_embeddings=params['train_embeddings'],
                  pooling_method=params['pooling_method'],
                  bidirectional=params['bidirectional'],
                  bi_combine=params['bi_combine']
                  )

        train_likes = [1 if re.search('Likes', i) else 0 for i in train_items]
        dev_likes = [1 if re.search('Likes', i) else 0 for i in dev_items]
        test_likes = [1 if re.search('Likes', i) else 0 for i in test_items]

        train_dem = [1 if re.search('Democrat', i) else 0 for i in train_items]
        dev_dem = [1 if re.search('Democrat', i) else 0 for i in dev_items]
        test_dem = [1 if re.search('Democrat', i) else 0 for i in test_items]

        train_extra = [[train_likes[i], train_dem[i]] for i, t in enumerate(train_items)]
        dev_extra = [[dev_likes[i], dev_dem[i]] for i, t in enumerate(dev_items)]
        test_extra = [[test_likes[i], test_dem[i]] for i, t in enumerate(test_items)]

        # train with early stopping on validation set


        best_f1 = -np.inf
        params['clr'] = params['lr']
        for e in xrange(params['n_epochs']):
            # shuffle
            shuffle([train_lex, train_y, train_extra], params['seed'])   # shuffle the input data
            params['ce'] = e                # store the current epoch
            tic = timeit.default_timer()

            #for i, (x, y) in enumerate(zip(train_lex, train_y)):
            for i, orig_x in enumerate(train_lex):
                n_words = len(orig_x)
                if params['add_OOV_noise']:
                    draws = np.random.rand(n_words)
                    x = [OOV_index if draws[idx] < params['OOV_noise_prob'] else orig_x[idx] for idx in range(n_words)]
                else:
                    x = orig_x
                y = train_y[i]
                extra = train_extra[i]

                if i == 0:
                    print ' '.join([idx2words[w] for w in train_lex[i]])

                if i == 0:
                    print x
                    print y

                nll = rnn.train(x, y, params['win'], params['clr'], params['lr_emb_fac'],
                          extra_input_dims, extra)
                if float(i/100.0) == float(i//100):
                    print nll
                print '[learning] epoch %i >> %2.2f%%' % (
                    e, (i + 1) * 100. / float(n_sentences)),
                print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),
                sys.stdout.flush()
                #if i == 0:
                #    print ' '.join([idx2words[idx] for idx in orig_x])
                #    print rnn.classify(orig_x, params['win'], extra_input_dims, extra)

                if np.isnan(nll) or np.isinf(nll):
                    return {'loss': nll,
                            'final_test_f1': 0,
                            'valid_f1s': [0],
                            'test_f1s': [0],
                            'status': STATUS_OK
                            }

            # evaluation // back into the real world : idx -> words
            print ""

            #print rnn.classify((np.asarray(contextwin(train_lex[0], params['win'])).astype('int32')), train_likes[0], params['win'])
            #print rnn.classify(train_lex[0], params['win'], extra_input_dims, train_extra[0])
            #print rnn.get_element_weights(np.asarray(contextwin(train_lex[0], params['win'])).astype('int32'))
            #if params['pooling_method'] == 'attention1' or params['pooling_method'] == 'attention2':
            #    if extra_input_dims == 0:
            #        r = np.random.randint(0, len(train_lex))
            #        print r, rnn.a_sum_check(np.asarray(contextwin(train_lex[r], params['win'])).astype('int32'))

            """
            predictions_train = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                 for x in train_lex]
            predictions_test = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                for x in test_lex]
            predictions_valid = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                 for x in valid_lex]
            """

            #predictions_train = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in train_lex]
            #predictions_test = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in test_lex]
            #predictions_valid = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in valid_lex]

            predictions_train = [rnn.classify(x, params['win'],
                                              extra_input_dims, train_extra[i]) for i, x in enumerate(train_lex)]
            predictions_test = [rnn.classify(x, params['win'],
                                             extra_input_dims, test_extra[i]) for i, x in enumerate(test_lex)]
            predictions_valid = [rnn.classify(x, params['win'],
                                              extra_input_dims, dev_extra[i]) for i, x in enumerate(valid_lex)]

            train_f1 = common.calc_mean_f1(predictions_train, train_y)
            test_f1 = common.calc_mean_f1(predictions_test, test_y)
            valid_f1 = common.calc_mean_f1(predictions_valid, valid_y)

            if reuser is not None:
                valid_f1 = reuser.mask_value(valid_f1, train_f1)

            question_f1s = []
            question_pps = []

            print "train_f1 =", train_f1, "valid_f1 =", valid_f1, "test_f1 =", test_f1
            results.append((train_f1, valid_f1, test_f1))

            if valid_f1 > best_f1:
                best_rnn = copy.deepcopy(rnn)
                best_f1 = valid_f1
                best_test_predictions = predictions_test

                if params['verbose']:
                    print('NEW BEST: epoch', e,
                          'valid f1', valid_f1,
                          'best test f1', test_f1)

                params['tr_f1'] = train_f1
                params['te_f1'] = test_f1
                params['v_f1'] = valid_f1
                params['be'] = e            # store the current epoch as a new best

            # learning rate decay if no improvement in a given number of epochs
            if abs(params['be']-params['ce']) >= params['decay_delay']:
                params['clr'] *= params['decay_factor']
                params['be'] = params['ce']
                print "Reverting to current best; new learning rate = ", params['clr']
                # also reset to the previous best
                rnn = best_rnn

            if params['clr'] < 1e-5:
                break

            if best_f1 == 1.0:
                break

            if best_f1 == 0 and e > 10:
                break

        if params['save_model']:
            predictions_valid = [rnn.classify(x, params['win'],
                                              extra_input_dims, dev_extra[i]) for i, x in enumerate(valid_lex)]

            #predictions_valid = [best_rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')) for x in valid_lex]
            best_rnn.save(output_dir)
            common.write_predictions(datasets, params['test_fold'], dev_fold, predictions_valid, dev_items, output_dir)

        print('BEST RESULT: epoch', params['be'],
              'train F1 ', params['tr_f1'],
              'valid F1', params['v_f1'],
              'best test F1', params['te_f1'],
              'with the model', output_dir)

        best_valid_f1s.append(params['v_f1'])
        best_test_f1s.append(params['te_f1'])

        test_prediction_arrays.append(np.array(best_test_predictions, dtype=int))

        output_filename = fh.make_filename(output_dir, 'results', 'txt')
        with codecs.open(output_filename, 'w') as output_file:
            for e, result in enumerate(results):
                output_file.write('epoch=' + str(e) + '; train_f1=' + str(result[0]) +
                                  '; valid_f1=' + str(result[1]) + '; test_f1=' + str(result[2]) + '\n')

    if params['ensemble']:
        test_predictions_stack = np.dstack(test_prediction_arrays)
        final_predictions = stats.mode(test_predictions_stack, axis=2)[0][:, :, 0]
        predicted_df = pd.DataFrame(final_predictions, index=test_items, columns=codes)
        true_df = pd.DataFrame(np.array(test_y), index=test_items, columns=codes)
        final_test_f1, final_test_pp = evaluation.calc_macro_mean_f1_pp(true_df, predicted_df)
    else:
        final_test_f1 = np.median(best_test_f1s)

    return {'loss': -np.median(best_valid_f1s),
            'final_test_f1': final_test_f1,
            'valid_f1s': best_valid_f1s,
            'test_f1s': best_test_f1s,
            'status': STATUS_OK
            }
Example #4
0
def main(params=None):

    if params is None:
        params = {
            "exp_name": "minibatch_test",
            "test_fold": 0,
            "n_dev_folds": 1,
            "min_doc_thresh": 1,
            "initialize_word_vectors": True,
            "vectors": "anes_word2vec",  # default_word2vec, anes_word2vec ...
            "word2vec_dim": 300,
            "init_scale": 0.2,
            "add_OOV": True,
            "win": 3,  # size of context window
            "add_DRLD": False,
            "rnn_type": "basic",  # basic, GRU, or LSTM
            "n_hidden": 3,  # size of hidden units
            "pooling_method": "max",  # max, mean, or attention1/2
            "bidirectional": False,
            "bi_combine": "mean",  # concat, max, or mean
            "train_embeddings": True,
            "lr": 0.1,  # learning rate
            "lr_emb_fac": 0.2,  # factor to modify learning rate for embeddings
            "decay_delay": 5,  # number of epochs with no improvement before decreasing learning rate
            "decay_factor": 0.5,  # factor by which to multiply learning rate in case of delay
            "n_epochs": 10,
            "add_OOV_noise": False,
            "OOV_noise_prob": 0.01,
            "minibatch_size": 1,
            "ensemble": False,
            "save_model": True,
            "seed": 42,
            "verbose": 1,
            "reuse": False,
            "orig_T": 0.04,
            "tau": 0.01,
        }

    # load params from a previous experiment
    params = fh.read_json("/Users/dcard/Projects/CMU/ARK/guac/experiments/best_mod.json")
    params["exp_name"] += "_minibatch_16"
    params["n_hidden"] = int(params["n_hidden"])
    params["orig_T"] = 0.02
    params["tau"] = 0.005

    reuser = None
    if params["reuse"]:
        reuser = reusable_holdout.ReuseableHoldout(T=params["orig_T"], tau=params["tau"])

    keys = params.keys()
    keys.sort()
    for key in keys:
        print key, ":", params[key]

    # seed the random number generators
    np.random.seed(params["seed"])
    random.seed(params["seed"])

    datasets = ["Democrat-Likes", "Democrat-Dislikes", "Republican-Likes", "Republican-Dislikes"]

    np.random.seed(params["seed"])
    random.seed(params["seed"])

    best_valid_f1s = []
    best_test_f1s = []

    test_prediction_arrays = []

    output_dir = fh.makedirs(defines.exp_dir, "rnn", params["exp_name"])
    output_filename = fh.make_filename(output_dir, "params", "json")
    fh.write_to_json(params, output_filename)

    for dev_fold in range(params["n_dev_folds"]):
        print "dev fold =", dev_fold

        output_dir = fh.makedirs(defines.exp_dir, "rnn", params["exp_name"], "fold" + str(dev_fold))
        results = []

        all_data, words2idx, items, all_labels = common.load_data(
            datasets, params["test_fold"], dev_fold, params["min_doc_thresh"]
        )
        train_xy, valid_xy, test_xy = all_data
        train_lex, train_y = train_xy
        valid_lex, valid_y = valid_xy
        test_lex, test_y = test_xy
        train_items, dev_items, test_items = items
        vocsize = len(words2idx.keys())
        idx2words = dict((k, v) for v, k in words2idx.iteritems())
        best_test_predictions = None

        n_sentences = len(train_lex)
        print "vocsize = ", vocsize, "n_train", n_sentences

        codes = all_labels.columns
        n_items, n_codes = all_labels.shape

        # get the words in the sentences for the test and validation sets
        words_valid = [map(lambda x: idx2words[x], w) for w in valid_lex]
        groundtruth_test = test_y[:]
        words_test = [map(lambda x: idx2words[x], w) for w in test_lex]

        initial_embeddings = common.load_embeddings(params, words2idx)
        OOV_index = words2idx["__OOV__"]
        emb_dim = initial_embeddings.shape[1]
        print "emb_dim =", emb_dim

        extra_input_dims = 0
        if params["add_DRLD"]:
            extra_input_dims = 2

        print "Building RNN"
        rnn = RNN(
            nh=params["n_hidden"],
            nc=n_codes,
            ne=vocsize,
            de=emb_dim,
            cs=params["win"],
            extra_input_dims=extra_input_dims,
            initial_embeddings=initial_embeddings,
            init_scale=params["init_scale"],
            rnn_type=params["rnn_type"],
            train_embeddings=params["train_embeddings"],
            pooling_method=params["pooling_method"],
            bidirectional=params["bidirectional"],
            bi_combine=params["bi_combine"],
        )

        train_likes = [1 if re.search("Likes", i) else 0 for i in train_items]
        dev_likes = [1 if re.search("Likes", i) else 0 for i in dev_items]
        test_likes = [1 if re.search("Likes", i) else 0 for i in test_items]

        train_dem = [1 if re.search("Democrat", i) else 0 for i in train_items]
        dev_dem = [1 if re.search("Democrat", i) else 0 for i in dev_items]
        test_dem = [1 if re.search("Democrat", i) else 0 for i in test_items]

        train_extra = [[train_likes[i], train_dem[i]] for i, t in enumerate(train_items)]
        dev_extra = [[dev_likes[i], dev_dem[i]] for i, t in enumerate(dev_items)]
        test_extra = [[test_likes[i], test_dem[i]] for i, t in enumerate(test_items)]

        # train with early stopping on validation set

        best_f1 = -np.inf
        params["clr"] = params["lr"]
        for e in xrange(params["n_epochs"]):
            # shuffle
            shuffle([train_lex, train_y, train_extra], params["seed"])  # shuffle the input data
            params["ce"] = e  # store the current epoch
            tic = timeit.default_timer()

            # for i, (x, y) in enumerate(zip(train_lex, train_y)):
            for i, orig_x in enumerate(train_lex):
                n_words = len(orig_x)
                if params["add_OOV_noise"]:
                    draws = np.random.rand(n_words)
                    x = [OOV_index if draws[idx] < params["OOV_noise_prob"] else orig_x[idx] for idx in range(n_words)]
                else:
                    x = orig_x
                y = train_y[i]
                extra = train_extra[i]

                if i == 0:
                    print " ".join([idx2words[w] for w in train_lex[i]])

                if i == 0:
                    print x
                    print y

                nll = rnn.train(x, y, params["win"], params["clr"], params["lr_emb_fac"], extra_input_dims, extra)
                if float(i / 100.0) == float(i // 100):
                    print nll
                print "[learning] epoch %i >> %2.2f%%" % (e, (i + 1) * 100.0 / float(n_sentences)),
                print "completed in %.2f (sec) <<\r" % (timeit.default_timer() - tic),
                sys.stdout.flush()
                # if i == 0:
                #    print ' '.join([idx2words[idx] for idx in orig_x])
                #    print rnn.classify(orig_x, params['win'], extra_input_dims, extra)

                if np.isnan(nll) or np.isinf(nll):
                    return {"loss": nll, "final_test_f1": 0, "valid_f1s": [0], "test_f1s": [0], "status": STATUS_OK}

            # evaluation // back into the real world : idx -> words
            print ""

            # print rnn.classify((np.asarray(contextwin(train_lex[0], params['win'])).astype('int32')), train_likes[0], params['win'])
            # print rnn.classify(train_lex[0], params['win'], extra_input_dims, train_extra[0])
            # print rnn.get_element_weights(np.asarray(contextwin(train_lex[0], params['win'])).astype('int32'))
            # if params['pooling_method'] == 'attention1' or params['pooling_method'] == 'attention2':
            #    if extra_input_dims == 0:
            #        r = np.random.randint(0, len(train_lex))
            #        print r, rnn.a_sum_check(np.asarray(contextwin(train_lex[r], params['win'])).astype('int32'))

            """
            predictions_train = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                 for x in train_lex]
            predictions_test = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                for x in test_lex]
            predictions_valid = [np.max(rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')), axis=0)
                                 for x in valid_lex]
            """

            # predictions_train = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in train_lex]
            # predictions_test = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in test_lex]
            # predictions_valid = [rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32'), likes) for x in valid_lex]

            predictions_train = [
                rnn.classify(x, params["win"], extra_input_dims, train_extra[i]) for i, x in enumerate(train_lex)
            ]
            predictions_test = [
                rnn.classify(x, params["win"], extra_input_dims, test_extra[i]) for i, x in enumerate(test_lex)
            ]
            predictions_valid = [
                rnn.classify(x, params["win"], extra_input_dims, dev_extra[i]) for i, x in enumerate(valid_lex)
            ]

            train_f1 = common.calc_mean_f1(predictions_train, train_y)
            test_f1 = common.calc_mean_f1(predictions_test, test_y)
            valid_f1 = common.calc_mean_f1(predictions_valid, valid_y)

            if reuser is not None:
                valid_f1 = reuser.mask_value(valid_f1, train_f1)

            question_f1s = []
            question_pps = []

            print "train_f1 =", train_f1, "valid_f1 =", valid_f1, "test_f1 =", test_f1
            results.append((train_f1, valid_f1, test_f1))

            if valid_f1 > best_f1:
                best_rnn = copy.deepcopy(rnn)
                best_f1 = valid_f1
                best_test_predictions = predictions_test

                if params["verbose"]:
                    print ("NEW BEST: epoch", e, "valid f1", valid_f1, "best test f1", test_f1)

                params["tr_f1"] = train_f1
                params["te_f1"] = test_f1
                params["v_f1"] = valid_f1
                params["be"] = e  # store the current epoch as a new best

            # learning rate decay if no improvement in a given number of epochs
            if abs(params["be"] - params["ce"]) >= params["decay_delay"]:
                params["clr"] *= params["decay_factor"]
                params["be"] = params["ce"]
                print "Reverting to current best; new learning rate = ", params["clr"]
                # also reset to the previous best
                rnn = best_rnn

            if params["clr"] < 1e-5:
                break

            if best_f1 == 1.0:
                break

            if best_f1 == 0 and e > 10:
                break

        if params["save_model"]:
            predictions_valid = [
                rnn.classify(x, params["win"], extra_input_dims, dev_extra[i]) for i, x in enumerate(valid_lex)
            ]

            # predictions_valid = [best_rnn.classify(np.asarray(contextwin(x, params['win'])).astype('int32')) for x in valid_lex]
            best_rnn.save(output_dir)
            common.write_predictions(datasets, params["test_fold"], dev_fold, predictions_valid, dev_items, output_dir)

        print (
            "BEST RESULT: epoch",
            params["be"],
            "train F1 ",
            params["tr_f1"],
            "valid F1",
            params["v_f1"],
            "best test F1",
            params["te_f1"],
            "with the model",
            output_dir,
        )

        best_valid_f1s.append(params["v_f1"])
        best_test_f1s.append(params["te_f1"])

        test_prediction_arrays.append(np.array(best_test_predictions, dtype=int))

        output_filename = fh.make_filename(output_dir, "results", "txt")
        with codecs.open(output_filename, "w") as output_file:
            for e, result in enumerate(results):
                output_file.write(
                    "epoch="
                    + str(e)
                    + "; train_f1="
                    + str(result[0])
                    + "; valid_f1="
                    + str(result[1])
                    + "; test_f1="
                    + str(result[2])
                    + "\n"
                )

    if params["ensemble"]:
        test_predictions_stack = np.dstack(test_prediction_arrays)
        final_predictions = stats.mode(test_predictions_stack, axis=2)[0][:, :, 0]
        predicted_df = pd.DataFrame(final_predictions, index=test_items, columns=codes)
        true_df = pd.DataFrame(np.array(test_y), index=test_items, columns=codes)
        final_test_f1, final_test_pp = evaluation.calc_macro_mean_f1_pp(true_df, predicted_df)
    else:
        final_test_f1 = np.median(best_test_f1s)

    return {
        "loss": -np.median(best_valid_f1s),
        "final_test_f1": final_test_f1,
        "valid_f1s": best_valid_f1s,
        "test_f1s": best_test_f1s,
        "status": STATUS_OK,
    }