Beispiel #1
0
def main(n_epochs=1, n_train=8000, n_test=1500, embedding_dimension=300, dropout_parameter=0.2, bidirectional=True,
         rnn_type='GRU', rnn_units=100, model_name='model', maxPooling=True, averagePooling=False, save=True):
    print("Working on Model: " + model_name)
    model = Model(embedding_dimension=embedding_dimension, dropout_parameter=dropout_parameter,
                  bidirectional=bidirectional,
                  rnn_type=rnn_type, rnn_units=rnn_units, name=model_name, maxPooling=maxPooling,
                  averagePooling=averagePooling)
    model.build_model()
    for i in range(n_epochs):
        print("Training epoch {}".format(i + 1))
        model.train_model(n_train=n_train)
        words, results, predictions, misclassified_examples, predicted_labels, true_labels, predicted_slots, true_slots = model.test_model(
            n_test=n_test)
        # format my words list such that they are in the correct format expected by the conlleval script
        words = [sentence.split() for sentence in words]
        _ = [sentence.pop(0) for sentence in words]
        _ = [sentence.pop(-1) for sentence in words]

        con_dict = conlleval(predicted_slots, true_slots,
                             words, 'measure.txt')

        print('Precision = {}, Recall = {}, F1 = {}'.format(
            con_dict['p'], con_dict['r'], con_dict['f1']))

        accuracy = model.get_accuracy(predicted_labels, true_labels)
        print('Accuracy = ' + str(accuracy))

    model.precision, model.recall, model.f1 = con_dict['p'], con_dict['r'], con_dict['f1']

    accuracy = model.get_accuracy(predicted_labels, true_labels)

    model.save_results(predictions, results, misclassified_examples)

    models[model_name] = {'accuracy': accuracy, 'precision': con_dict['p'], 'recall': con_dict['r'],
                          'f1': con_dict['f1']}
    model.summary['accuracy'] = accuracy
    print(model.summary['accuracy'])
    model.summary['precision'], model.summary['recall'], model.summary['f1'] = con_dict['p'], con_dict['r'], con_dict[
        'f1']
    if save:
        model.save_model()

    return model
Beispiel #2
0
def bioEalve(id_sents,
             id_tagss,
             id_test_tagss,
             flag=0,
             word_dict=None,
             tag_dict=None):
    ## id_sents, id_tagss, id_labels,前两者都是矩阵,最后一个是向量。三者第一维相等
    assert id_sents.shape == id_tagss.shape
    assert id_sents.shape == id_test_tagss.shape
    #if not os.path.exists(dict_pkl_file):
    #    raise ValueError("Dict file do not exists")
    #word_dict, tag_dict, label_dict = cPickle.load(open(dict_pkl_file, "rb"))

    # evaluation
    predictions_test = []
    groundtruth_test = []
    words_test = []
    for i in range(len(id_sents)):
        words = []
        groundtruth = []
        predictions = []
        for j in range(len(id_sents[i])):
            if id_sents[i][j] != flag:
                words.append('O' if word_dict[id_sents[i][j]].upper(
                ) == '__UNKNOWN__TAG__' else word_dict[id_sents[i][j]].upper())
                groundtruth.append('O' if tag_dict[id_tagss[i][j]].upper(
                ) == '__UNKNOWN__TAG__' else tag_dict[id_tagss[i][j]].upper())
                predictions.append('O' if tag_dict[id_test_tagss[i][j]].upper(
                ) == '__UNKNOWN__TAG__' else tag_dict[id_test_tagss[i][j]].
                                   upper())
        words_test.append(words[:])
        groundtruth_test.append(groundtruth[:])
        predictions_test.append(predictions[:])
    # evaluation // compute the accuracy using conlleval.pl
    res_test = conlleval(predictions_test, groundtruth_test, words_test)
    return res_test
Beispiel #3
0
        sent = sent[np.newaxis, :]

        if sent.shape[1] > 1:
            loss = model.train_on_batch(sent, label)
            avgLoss += loss

        pred = model.predict_on_batch(sent)
        pred = np.argmax(pred, -1)[0]
        train_pred_label.append(pred)

    avgLoss = avgLoss / n_batch

    predword_train = [
        list(map(lambda x: idx2la[x], y)) for y in train_pred_label
    ]
    con_dict = conlleval(predword_train, groundtruth_train, words_train,
                         'r.txt')
    train_f_scores.append(con_dict['f1'])
    print('Loss = {}, Precision = {}, Recall = {}, F1 = {}'.format(
        avgLoss, con_dict['r'], con_dict['p'], con_dict['f1']))

    print("Validating =>")
    val_pred_label = []
    avgLoss = 0

    bar = progressbar.ProgressBar(max_value=len(val_x))
    for n_batch, sent in bar(enumerate(val_x)):
        label = val_label[n_batch]
        label = np.eye(n_classes)[label][np.newaxis, :]
        sent = sent[np.newaxis, :]

        if sent.shape[1] > 1:
Beispiel #4
0
        rnn.load_model_parameters(model_file)
        rnn.build_model()

        predictions_test = [
            map(lambda x: idx2label[x], rnn.predict(x)) for x in test_x
        ]
        groundtruth_test = [map(lambda x: idx2label[x], y) for y in test_Y]
        words_test = [map(lambda x: idx2word[x], w) for w in test_X]

        predictions_valid = [
            map(lambda x: idx2label[x], rnn.predict(x)) for x in valid_x
        ]
        groundtruth_valid = [map(lambda x: idx2label[x], y) for y in valid_Y]
        words_valid = [map(lambda x: idx2word[x], w) for w in valid_X]
        # evaluation // compute the accuracy using conlleval.pl
        res_test = conlleval(predictions_test, groundtruth_test, words_test,
                             folder + '/current.test.txt')
        res_valid = conlleval(predictions_valid, groundtruth_valid,
                              words_valid, folder + '/current.valid.txt')

        print res_test, res_valid

        test_file.write(
            'Valid: F1:\t%f Precision:\t%f Recall:\t%f, File: %s' %
            (res_valid['f1'], res_valid['p'], res_valid['r'], file))
        if res_valid['f1'] > valid_F1['f1']:
            valid_F1 = res_valid
            print 'Best valid F1 updated', res_valid
            test_file.write('\tBest valid F1 updated')
        test_file.write('\n')
        test_file.write(
            'Test: F1:\t%f Precision:\t%f Recall:\t%f, File: %s\n' %
            
        # evaluation // back into the real world : idx -> words
        predictions_test = [ map(lambda x: idx2label[x], \
                             rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\
                             for x in test_lex ]
        groundtruth_test = [ map(lambda x: idx2label[x], y) for y in test_y ]
        words_test = [ map(lambda x: idx2word[x], w) for w in test_lex]

        predictions_valid = [ map(lambda x: idx2label[x], \
                             rnn.classify(numpy.asarray(contextwin(x, s['win'])).astype('int32')))\
                             for x in valid_lex ]
        groundtruth_valid = [ map(lambda x: idx2label[x], y) for y in valid_y ]
        words_valid = [ map(lambda x: idx2word[x], w) for w in valid_lex]

        # evaluation // compute the accuracy using conlleval.pl
        res_test  = conlleval(predictions_test, groundtruth_test, words_test, folder + '/current.test.txt')
        res_valid = conlleval(predictions_valid, groundtruth_valid, words_valid, folder + '/current.valid.txt')

        if res_valid['f1'] > best_f1:
            rnn.save(folder)
            best_f1 = res_valid['f1']
            if s['verbose']:
                print 'NEW BEST: epoch', e, 'valid F1', res_valid['f1'], 'best test F1', res_test['f1'], ' '*20
            s['vf1'], s['vp'], s['vr'] = res_valid['f1'], res_valid['p'], res_valid['r']
            s['tf1'], s['tp'], s['tr'] = res_test['f1'],  res_test['p'],  res_test['r']
            s['be'] = e
            subprocess.call(['mv', folder + '/current.test.txt', folder + '/best.test.txt'])
            subprocess.call(['mv', folder + '/current.valid.txt', folder + '/best.valid.txt'])
        else:
            print ''
        
def main(args):
    np.random.seed(0xC0FFEE)

    train, test, dicts = pkl.load(open('datas/atis.pkl', 'r'))
    index2words = {value: key for key, value in dicts['words2idx'].iteritems()}
    index2tables = {
        value: key
        for key, value in dicts['tables2idx'].iteritems()
    }
    index2labels = {
        value: key
        for key, value in dicts['labels2idx'].iteritems()
    }

    datas = [
        {
            'name': 'train',
            'x': train[0],
            'y': train[2],
            'size': len(train[0])
        },
        {
            'name': 'test',
            'x': test[0],
            'y': test[2],
            'size': len(test[0])
        },
    ]

    vocsize = len(dicts['words2idx']) + 1
    nclasses = len(dicts['labels2idx'])
    context_window_size = args.window_size

    n = Network()
    # word embedding layer
    n.layers.append(Fullconnect(vocsize, 256, Tanh.function, Tanh.derivative))
    # recurrent layer
    n.layers.append(
        Recurrent(n.layers[-1].output_size, 256, ReLU.function,
                  ReLU.derivative))
    n.layers.append(
        Dropout(n.layers[-1].output_size, 256, 0.5, ReLU.function,
                ReLU.derivative))
    n.layers.append(Fullconnect(n.layers[-1].output_size, nclasses))
    n.activation = Softmax(is_zero_pad=True)

    if not os.path.isfile(args.params):
        logging.error('not exist params: %s' % args.params)
        return

    fname = args.params
    n.load_params(pkl.load(open(fname, 'rb')))
    logging.info('load parameters at %s' % (fname))

    # prediction setup for evaluation
    for l, layer in enumerate(n.layers):
        if 'Dropout' == type(layer).__name__:
            n.layers[l].is_testing = True

    data = datas[1]
    max_iteration = data['size']
    results = {'p': [], 'g': [], 'w': []}
    for i in range(max_iteration):
        idx = i
        x = data['x'][idx]
        labels = data['y'][idx]

        cwords = contextwin(datas[1]['x'][idx], context_window_size)
        words = onehotvector(cwords, vocsize)[0]
        _ = n.predict(words)

        y = [np.argmax(prediction) for prediction in _]

        results['p'].append([index2tables[_] for _ in y])
        results['g'].append([index2tables[_] for _ in labels])
        results['w'].append([index2words[_] for _ in x])

    rv = conlleval(results['p'], results['g'], results['w'],
                   'atis_test_file.tmp')
    logging.info('evaluation result: %s' % (str(rv)))

    for i in range(20):
        idx = random.randint(0, datas[1]['size'] - 1)
        x = datas[1]['x'][idx]
        labels = datas[1]['y'][idx]

        cwords = contextwin(datas[1]['x'][idx], context_window_size)
        words = onehotvector(cwords, vocsize)[0]
        _ = n.predict(words)

        y = [np.argmax(prediction) for prediction in _]

        print 'word:   ', ' '.join([index2words[_] for _ in x])
        print 'table:  ', ' '.join([index2tables[_] for _ in labels])
        print 'label:  ', ' '.join([index2labels[_] for _ in labels])
        print 'predict:', ' '.join([index2labels[_] for _ in y])
Beispiel #7
0
def main(args):
    np.random.seed(0xC0FFEE)

    train, test, dicts = pkl.load(open('datas/atis.pkl', 'r'))
    index2words = {value: key for key, value in dicts['words2idx'].iteritems()}
    index2tables = {
        value: key
        for key, value in dicts['tables2idx'].iteritems()
    }
    index2labels = {
        value: key
        for key, value in dicts['labels2idx'].iteritems()
    }

    datas = [
        {
            'name': 'train',
            'x': train[0],
            'y': train[2],
            'size': len(train[0])
        },
        {
            'name': 'test',
            'x': test[0],
            'y': test[2],
            'size': len(test[0])
        },
    ]

    vocsize = len(dicts['words2idx']) + 1
    nclasses = len(dicts['labels2idx'])
    context_window_size = args.window_size
    learning_rate = args.learning_rate
    minibatch = args.minibatch
    logging.info(
        'vocsize:%d, nclasses:%d, window-size:%d, minibatch:%d, learning-rate:%.5f'
        % (vocsize, nclasses, context_window_size, minibatch, learning_rate))

    model_script = '''
def model(learning_rate=0.0001):
    n = Network()
    # word embedding layer
    n.layers.append( Fullconnect(573, 256,                       Tanh.function, Tanh.derivative,
        updater=GradientDescent(learning_rate)) )
    # recurrent layer
    n.layers.append( Recurrent(n.layers[-1].output_size, 256,    ReLU.function, ReLU.derivative,
        updater=GradientDescent(learning_rate)) )
    n.layers.append( Dropout(n.layers[-1].output_size, 256, 0.5, ReLU.function, ReLU.derivative,
        updater=GradientDescent(learning_rate)) )
    n.layers.append( Fullconnect(n.layers[-1].output_size, 127,
        updater=GradientDescent(learning_rate)) )
    n.activation = Softmax(is_zero_pad=True)
    return n
    '''
    exec(model_script)
    n = model(learning_rate)

    minimum_validation_error_rate = 1.0
    for epoch in xrange(args.epoch):
        for data in datas:
            for l, layer in enumerate(n.layers):
                if 'Dropout' == type(layer).__name__:
                    n.layers[l].is_testing = data['name'] == 'test'

            epoch_loss = 0
            epoch_error_rate = 0
            max_iteration = data['size'] / minibatch
            for i in xrange(max_iteration):
                if data['name'] == 'train':
                    idxs = [
                        random.randint(0, data['size'] - 1)
                        for _ in range(minibatch)
                    ]
                else:
                    idxs = [i * minibatch + k for k in range(minibatch)]
                cwords = [
                    contextwin(data['x'][idx], context_window_size)
                    for idx in idxs
                ]
                words_labels = [
                    onehotvector(cword, vocsize, data['y'][idx], nclasses)
                    for idx, cword in zip(idxs, cwords)
                ]

                words = [word for word, label in words_labels]
                labels = [label for word, label in words_labels]

                # zero padding for minibatch
                max_size_of_sequence = max([_.shape[0] for _ in words])
                for k, (word, label) in enumerate(zip(words, labels)):
                    size_of_sequence = word.shape[0]
                    words[k] = np.pad(
                        word,
                        ((0, max_size_of_sequence - size_of_sequence), (0, 0)),
                        mode='constant')
                    labels[k] = np.pad(
                        label,
                        ((0, max_size_of_sequence - size_of_sequence), (0, 0)),
                        mode='constant')

                words = np.swapaxes(np.array(words), 0, 1)
                labels = np.swapaxes(np.array(labels), 0, 1)

                if data['name'] == 'train':
                    loss = n.train(words,
                                   labels) / (max_size_of_sequence * minibatch)
                    predictions = n.y
                else:
                    predictions = n.predict(words)
                    loss = n.activation.loss(predictions, labels) / (
                        max_size_of_sequence * minibatch)
                error_rate = n.activation.error(
                    predictions, labels) / (max_size_of_sequence * minibatch)

                epoch_loss += loss
                epoch_error_rate += error_rate
                if i % (1000 /
                        minibatch) == 0 and i != 0 and data['name'] == 'train':
                    logging.info(
                        'epoch:%04d iter:%04d loss:%.5f error-rate:%.5f' %
                        (epoch, i, epoch_loss / (i + 1), epoch_error_rate /
                         (i + 1)))

            logging.info('[%5s] epoch:%04d loss:%.5f error-rate:%.5f' %
                         (data['name'], epoch, epoch_loss / max_iteration,
                          epoch_error_rate / max_iteration))
            if args.params and data[
                    'name'] == 'test' and minimum_validation_error_rate > epoch_error_rate / max_iteration:
                minimum_validation_error_rate = epoch_error_rate / max_iteration
                fname = args.params + '_min_error.pkl'
                pkl.dump(n.dump_params(), open(fname, 'wb'))
                logging.info('dump parameters at %s' % (fname))

    # prediction setup for evaluation
    for l, layer in enumerate(n.layers):
        if 'Dropout' == type(layer).__name__:
            n.layers[l].is_testing = data['name'] == 'test'

    data = datas[1]
    max_iteration = data['size']
    results = {'p': [], 'g': [], 'w': []}
    for i in range(max_iteration):
        idx = i
        x = data['x'][idx]
        labels = data['y'][idx]

        cwords = contextwin(datas[1]['x'][idx], context_window_size)
        words = onehotvector(cwords, vocsize)[0]
        _ = n.predict(words)

        y = [np.argmax(prediction) for prediction in _]

        results['p'].append([index2labels[_] for _ in y])
        results['g'].append([index2labels[_] for _ in labels])
        results['w'].append([index2words[_] for _ in x])

    rv = conlleval(results['p'], results['g'], results['w'],
                   'atis_test_file.tmp')
    logging.info('evaluation result: %s' % (str(rv)))

    if args.params:
        fname = args.params + '_last.pkl'
        pkl.dump(n.dump_params(), open(fname, 'wb'))
        logging.info('dump parameters at %s' % (fname))
    '''
def main(args):
    np.random.seed(0xC0FFEE)

    train, test, dicts = pkl.load( open('datas/atis.pkl', 'r') )
    index2words = {value:key for key, value in dicts['words2idx'].iteritems()}
    index2tables = {value:key for key, value in dicts['tables2idx'].iteritems()}
    index2labels = {value:key for key, value in dicts['labels2idx'].iteritems()}

    datas = [
            {'name':'train', 'x':train[0], 'y':train[2], 'size':len(train[0])},
            {'name':'test',  'x':test[0],  'y':test[2], 'size':len(test[0])},
            ]

    vocsize = len(dicts['words2idx']) + 1
    nclasses = len(dicts['labels2idx'])
    context_window_size = args.window_size

    n = Network()
    # word embedding layer
    n.layers.append( Fullconnect(vocsize, 256,                   Tanh.function, Tanh.derivative) )
    # recurrent layer
    n.layers.append( Recurrent(n.layers[-1].output_size, 256,    ReLU.function, ReLU.derivative) )
    n.layers.append( Dropout(n.layers[-1].output_size, 256, 0.5,  ReLU.function, ReLU.derivative) )
    n.layers.append( Fullconnect(n.layers[-1].output_size, nclasses) )
    n.activation = Softmax(is_zero_pad=True)

    if not os.path.isfile( args.params ):
        logging.error('not exist params: %s'%args.params)
        return

    fname = args.params
    n.load_params( pkl.load( open(fname, 'rb') ) )
    logging.info('load parameters at %s'%(fname))


    # prediction setup for evaluation
    for l, layer in enumerate(n.layers):
        if 'Dropout' == type( layer ).__name__:
            n.layers[l].is_testing = True

    data = datas[1]
    max_iteration = data['size']
    results = {'p':[], 'g':[], 'w':[]}
    for i in range(max_iteration):
        idx = i
        x = data['x'][idx]
        labels = data['y'][idx]

        cwords = contextwin(datas[1]['x'][idx], context_window_size)
        words = onehotvector(cwords, vocsize)[0]
        _ = n.predict(words)

        y = [np.argmax(prediction) for prediction in _]

        results['p'].append( [index2tables[_] for _ in y] )
        results['g'].append( [index2tables[_] for _ in labels] )
        results['w'].append( [index2words[_] for _ in x] )

    rv = conlleval(results['p'], results['g'], results['w'], 'atis_test_file.tmp')
    logging.info('evaluation result: %s'%(str(rv)))

    for i in range(20):
        idx = random.randint(0, datas[1]['size']-1)
        x = datas[1]['x'][idx]
        labels = datas[1]['y'][idx]

        cwords = contextwin(datas[1]['x'][idx], context_window_size)
        words = onehotvector(cwords, vocsize)[0]
        _ = n.predict(words)

        y = [np.argmax(prediction) for prediction in _]

        print 'word:   ', ' '.join([index2words[_] for _ in x])
        print 'table:  ', ' '.join([index2tables[_] for _ in labels])
        print 'label:  ', ' '.join([index2labels[_] for _ in labels])
        print 'predict:', ' '.join([index2labels[_] for _ in y])
def main(param=None):
    if not param:
        param = {
            'lr': 0.0970806646812754,
            'verbose': 1,
            'decay': True,
            # decay on the learning rate if improvement stops
            'win': 7,
            # number of words in the context window
            'nhidden': 200,
            # number of hidden units
            'seed': 345,
            'emb_dimension': 50,
            # dimension of word embedding
            'nepochs': 100,
            # 60 is recommended
            'savemodel': False
        }
    print param

    folder = "RelationExtraction"
    if not os.path.exists(folder):
        os.mkdir(folder)
    #load dataset
    pickle_file = 'semeval.pkl'
    with open(pickle_file, 'rb') as f:
        save = pickle.load(f)
        train_dataset = save['train_dataset']
        train_labels = save['train_labels']
        test_dataset = save['test_dataset']
        test_labels = save['test_labels']
        dic = save['dicts']
        del save  # hint to help gc free up memory
        print('Training set', train_dataset.shape, train_labels.shape)
        print('Test set', test_dataset.shape, test_labels.shape)

    # In[5]:
    train_dataset = [np.array(x, dtype=np.int32) for x in train_dataset]
    train_labels = [np.array(x, dtype=np.int32) for x in train_labels]
    x_test = [np.array(x, dtype=np.int32) for x in test_dataset]
    y_test = [np.array(x, dtype=np.int32) for x in test_labels]

    x_train = train_dataset[0:7200]
    y_train = train_labels[0:7200]
    x_valid = train_dataset[7201:8000]
    y_valid = train_labels[7201:8000]

    # In[6]:

    #Raw input encoding -''' visualize a few sentences '''
    w2idx, labels2idx = dic['words2idx'], dic['labels2idx']
    idx2w = dict((v, k) for k, v in w2idx.iteritems())
    idx2la = dict((v, k) for k, v in labels2idx.iteritems())

    # In[10]:

    vocsize = len(idx2w)
    nclasses = len(idx2la)
    nsentences = len(x_train)

    groundtruth_valid = [map(lambda x: idx2la[x], y) for y in y_valid]
    words_valid = [map(lambda x: idx2w[x], w) for w in x_valid]
    groundtruth_test = [map(lambda x: idx2la[x], y) for y in y_test]
    words_test = [map(lambda x: idx2w[x], w) for w in x_test]

    # instanciate the model
    np.random.seed(param['seed'])
    random.seed(param['seed'])

    rnn = GRUTheano(word_dim=param['emb_dimension'],
                    window_context_size=param['win'],
                    vocab_size=vocsize,
                    num_labels=nclasses,
                    hidden_dim=param['nhidden'])
    #rnn = RNNSLU_LSTM(hidden_dim=param['nhidden'], num_labels=nclasses, vocab_size=vocsize, word_dim=param['emb_dimension'], window_context_size=param['win'])

    # train with early stopping on validation set
    best_f1 = -np.inf
    param['clr'] = param['lr']
    for e in xrange(param['nepochs']):

        # shuffle
        shuffle([x_train, y_train], param['seed'])

        param['ce'] = e
        tic = timeit.default_timer()

        for i, (x, y) in enumerate(zip(x_train, y_train)):
            rnn.train(x, y, param['win'], param['clr'])
            print '[learning] epoch %i >> %2.2f%%' % (e, (i + 1) * 100. /
                                                      nsentences),
            print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() -
                                                    tic),
            sys.stdout.flush()

        # evaluation // back into the real world : idx -> words
        predictions_test = [
            map(
                lambda x: idx2la[x],
                rnn.classify(
                    np.asarray(contextwin(x, param['win'])).astype('int32')))
            for x in x_test
        ]
        predictions_valid = [
            map(
                lambda x: idx2la[x],
                rnn.classify(
                    np.asarray(contextwin(x, param['win'])).astype('int32')))
            for x in x_valid
        ]

        # evaluation // compute the accuracy using conlleval.pl
        res_test = conlleval(predictions_test, groundtruth_test, words_test,
                             folder + '/current.test.txt', folder)
        res_valid = conlleval(predictions_valid, groundtruth_valid,
                              words_valid, folder + '/current.valid.txt',
                              folder)

        if res_valid['f1'] > best_f1:

            if param['savemodel']:
                rnn.save(folder)

            best_rnn = copy.deepcopy(rnn)
            best_f1 = res_valid['f1']

            if param['verbose']:
                print('NEW BEST: epoch', e, 'valid F1', res_valid['f1'],
                      'best test F1', res_test['f1'])

            param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1']
            param['vp'], param['tp'] = res_valid['p'], res_test['p']
            param['vr'], param['tr'] = res_valid['r'], res_test['r']
            param['be'] = e

            subprocess.call([
                'mv', folder + '/current.test.txt', folder + '/best.test.txt'
            ])
            subprocess.call([
                'mv', folder + '/current.valid.txt', folder + '/best.valid.txt'
            ])
        else:
            if param['verbose']:
                print ''

        # learning rate decay if no improvement in 10 epochs
        if param['decay'] and abs(param['be'] - param['ce']) >= 10:
            param['clr'] *= 0.5
            rnn = best_rnn

        if param['clr'] < 1e-5:
            break

    print('BEST RESULT: epoch', param['be'], 'valid F1', param['vf1'],
          'best test F1', param['tf1'], 'with the model', folder)
def main(param=None):
	if not param:
		param = {'lr': 0.0970806646812754,
		    'verbose': 1,
		    'decay': True,
		    # decay on the learning rate if improvement stops
		    'win': 7,
		    # number of words in the context window
		    'nhidden': 200,
		    # number of hidden units
		    'seed': 345,
		    'emb_dimension': 50,
		    # dimension of word embedding
		    'nepochs': 100,
		    # 60 is recommended
		    'savemodel': False}
	print param

	folder = "RelationExtraction"
	if not os.path.exists(folder):
		os.mkdir(folder)
	#load dataset
	pickle_file = 'semeval.pkl'
	with open(pickle_file, 'rb') as f:
	    save = pickle.load(f)
	    train_dataset = save['train_dataset']
	    train_labels = save['train_labels']
	    test_dataset = save['test_dataset']
	    test_labels = save['test_labels']
	    dic=save['dicts']
	    del save  # hint to help gc free up memory  
	    print('Training set', train_dataset.shape, train_labels.shape)
	    print('Test set', test_dataset.shape, test_labels.shape)


	# In[5]:
	train_dataset=[np.array(x,dtype=np.int32) for x in train_dataset]
	train_labels=[np.array(x,dtype=np.int32) for x in train_labels]
	x_test=[np.array(x,dtype=np.int32) for x in test_dataset]
	y_test=[np.array(x,dtype=np.int32) for x in test_labels]

	x_train=train_dataset[0:7200]
	y_train=train_labels[0:7200]
	x_valid=train_dataset[7201:8000]
	y_valid=train_labels[7201:8000]
	
	

	# In[6]:

	#Raw input encoding -''' visualize a few sentences '''
	w2idx,labels2idx = dic['words2idx'], dic['labels2idx']
	idx2w  = dict((v,k) for k,v in w2idx.iteritems())
	idx2la = dict((v,k) for k,v in labels2idx.iteritems())  

	# In[10]:

	vocsize = len(idx2w)
	nclasses = len(idx2la)
	nsentences = len(x_train)

	groundtruth_valid = [map(lambda x: idx2la[x], y) for y in y_valid]
	words_valid = [map(lambda x: idx2w[x], w) for w in x_valid]
	groundtruth_test = [map(lambda x: idx2la[x], y) for y in y_test]
	words_test = [map(lambda x: idx2w[x], w) for w in x_test]

	# instanciate the model
	np.random.seed(param['seed'])
	random.seed(param['seed'])
 
	rnn = GRUTheano(word_dim=param['emb_dimension'], window_context_size=param['win'], vocab_size=vocsize, num_labels=nclasses, hidden_dim=param['nhidden'])
	#rnn = RNNSLU_LSTM(hidden_dim=param['nhidden'], num_labels=nclasses, vocab_size=vocsize, word_dim=param['emb_dimension'], window_context_size=param['win'])

	# train with early stopping on validation set
	best_f1 = -np.inf
    	param['clr'] = param['lr']
    	for e in xrange(param['nepochs']):

		# shuffle
		shuffle([x_train, y_train], param['seed'])

		param['ce'] = e
		tic = timeit.default_timer()
		
		for i, (x, y) in enumerate(zip(x_train, y_train)):
		    rnn.train(x, y, param['win'], param['clr'])
		    print '[learning] epoch %i >> %2.2f%%' % (
		        e, (i + 1) * 100. / nsentences),
		    print 'completed in %.2f (sec) <<\r' % (timeit.default_timer() - tic),
		    sys.stdout.flush()

		# evaluation // back into the real world : idx -> words
		predictions_test = [map(lambda x: idx2la[x],
		                    rnn.classify(np.asarray(
		                    contextwin(x, param['win'])).astype('int32')))
		                    for x in x_test]
		predictions_valid = [map(lambda x: idx2la[x],
		                     rnn.classify(np.asarray(
		                     contextwin(x, param['win'])).astype('int32')))
		                     for x in x_valid]

		# evaluation // compute the accuracy using conlleval.pl
		res_test = conlleval(predictions_test,
		                     groundtruth_test,
		                     words_test,
		                     folder + '/current.test.txt',
		                     folder)
		res_valid = conlleval(predictions_valid,
		                      groundtruth_valid,
		                      words_valid,
		                      folder + '/current.valid.txt',
		                      folder)

		if res_valid['f1'] > best_f1:

		    if param['savemodel']:
		        rnn.save(folder)

		    best_rnn = copy.deepcopy(rnn)
		    best_f1 = res_valid['f1']

		    if param['verbose']:
		        print('NEW BEST: epoch', e,
		              'valid F1', res_valid['f1'],
		              'best test F1', res_test['f1'])

		    param['vf1'], param['tf1'] = res_valid['f1'], res_test['f1']
		    param['vp'], param['tp'] = res_valid['p'], res_test['p']
		    param['vr'], param['tr'] = res_valid['r'], res_test['r']
		    param['be'] = e

		    subprocess.call(['mv', folder + '/current.test.txt',
		                    folder + '/best.test.txt'])
		    subprocess.call(['mv', folder + '/current.valid.txt',
		                    folder + '/best.valid.txt'])
		else:
		    if param['verbose']:
		        print ''

		# learning rate decay if no improvement in 10 epochs
		if param['decay'] and abs(param['be']-param['ce']) >= 10:
		    param['clr'] *= 0.5
		    rnn = best_rnn

		if param['clr'] < 1e-5:
		    break

	print('BEST RESULT: epoch', param['be'],
		  'valid F1', param['vf1'],
		  'best test F1', param['tf1'],
		  'with the model', folder)
                else:
                    train_yp[i][j][k] = 1

    np.random.seed(s['seed'])
    random.seed(s['seed'])

    rnn = model(hnum = s['hnum'], ynum = s['ynum'], wnum = s['wnum'], dnum = s['dnum'], me = s['me'], md = s['md'], mx = s['mx'], L2 = s['L2'],
                Wlnum = s['Wlnum'], Wrnum = s['Wrnum'], kalpha=s['kalpha'])

    s['cur_lr'] = s['lr']

   
    test_pred = []
    for ei, di, li, si, tli, tri, tai in zip(test_e, test_d, test_l, test_s, test_tl, test_tr, test_ta ):
        test_pred += [rnn.classify(ei, di, li, si, tli, tri, tai)]                 
    res_test = conlleval(test_pred, test_y)
    print ""
    for (d,x) in res_test.items():
        print d + ": " + str(x)
    print "start train"
    for e in xrange(s['epoch']):
        #shuffle
        shuffle([train_e, train_d, train_l, train_s, train_tl, train_tr, train_ta, train_y, train_yp], s['seed'])
        s['cur_epoch'] = e
        tic = time.time()
        for i in xrange(nsentences):
            ei = train_e[i]
            di = train_d[i]
            li = train_l[i]
            si = train_s[i]
            tli = train_tl[i]