Beispiel #1
0
def check_random_example(dataset, model, idx2w):
    """
    Check a random test sentence and its autoencoder reconstruction
    Args:
        dataset: AutoencoderDataset
        model: Autoencoder model
        idx2w (dict): A reversed dictionary matching ids to tokens

    Returns (tuple): input training sentence and its reconstruction

    """
    random_idx = np.random.randint(len(dataset))
    input = dataset[random_idx]
    input = variable(input)
    output = model(input.unsqueeze(0)).squeeze()

    _, words = torch.max(F.softmax(output, dim=-1), dim=1)
    input_text = idx2text(list(input.cpu().numpy()), idx2w)
    predicted_text = idx2text(list(words.cpu().numpy()), idx2w)
    return input_text, predicted_text
Beispiel #2
0
def train():

    if prm.optimizer.lower() == 'adam':
        optimizer=adam
    elif prm.optimizer.lower() == 'sgd':
        optimizer=sgd
    elif prm.optimizer.lower() == 'rmsprop':
        optimizer=rmsprop
    elif prm.optimizer.lower() == 'adadelta':
        optimizer=adadelta

    options = locals().copy()

    print 'parameters:', str(options)
    prm_k = vars(prm).keys()
    prm_d = vars(prm)
    prm_k.sort()
    for x in prm_k:
        if not x.startswith('__'):
            print x,'=', prm_d[x]

    print 'loading Vocabulary...'
    vocab = utils.load_vocab(prm.vocab_path, prm.n_words)
    options['vocab'] = vocab

    options['vocabinv'] = {}
    for k,v in vocab.items():
        options['vocabinv'][v] = k

    print 'Loading Environment...'
    if prm.engine.lower() == 'lucene':
        import lucene_search
        options['engine'] = lucene_search.LuceneSearch()
    elif prm.engine.lower() == 'elastic':
        import elastic_search
        options['engine'] = elastic_search.ElasticSearch()

    print 'Loading Dataset...'
    dh5 = dataset_hdf5.DatasetHDF5(prm.dataset_path)
    qi_train = dh5.get_queries(dset='train')
    dt_train = dh5.get_doc_ids(dset='train')
    qi_valid = dh5.get_queries(dset='valid')
    dt_valid = dh5.get_doc_ids(dset='valid')
    qi_test = dh5.get_queries(dset='test')
    dt_test = dh5.get_doc_ids(dset='test')
    
    if prm.train_size == -1:
        train_size = len(qi_train)
    else:
        train_size = min(prm.train_size, len(qi_train))

    if prm.valid_size == -1:
        valid_size = len(qi_valid)
    else:
        valid_size = min(prm.valid_size, len(qi_valid))

    if prm.test_size == -1:
        test_size = len(qi_test)
    else:
        test_size = min(prm.test_size, len(qi_test))

    print '%d train examples' % len(qi_train)
    print '%d valid examples' % len(qi_valid)
    print '%d test examples' % len(qi_test)

    # This create the initial parameters as np ndarrays.
    # Dict name (string) -> np ndarray
    params, exclude_params = init_params(options)

    if prm.wordemb_path:
        print 'loading pre-trained word embeddings'
        params = load_wemb(params, vocab)
        options['W'] = params['W']

    if prm.reload_model:
        load_params(prm.reload_model, params)

    print 'Building model'
    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)
    for kk, value in tparams.iteritems():
        tparams[kk] = theano.shared(value, name=kk)

    iin, out, updates, f_pred, consider_constant \
            = build_model(tparams, options)

    #get only parameters that are not in the exclude_params list
    tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params])

    grads = tensor.grad(out[0], wrt=itemlist(tparams_), consider_constant=consider_constant)

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams_, grads, iin, out, updates)

    history_errs = []
    best_p = None

    if prm.validFreq == -1:
        validFreq = len(qi_train) / prm.batch_size_train
    else:
        validFreq = prm.validFreq

    if prm.saveFreq == -1:
        saveFreq = len(qi_train) / prm.batch_size_train
    else:
        saveFreq = prm.saveFreq

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.time()

    print 'Optimization'
    
    try:
        for eidx in xrange(prm.max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(qi_train), prm.batch_size_train, shuffle=True)

            for _, train_index in kf:
                st = time.time()

                uidx += 1
                qi, qi_i, qi_lst, D_gt_id, D_gt_url = get_samples(qi_train, dt_train, train_index, options)

                # share the current queries with the search engine.
                options['current_queries'] = qi_lst

                n_samples += len(qi)

                is_train = 1.

                out = f_grad_shared(qi_i, D_gt_id, is_train)

                cost = out.pop(0)
                cost_ent = out.pop(0)

                lr_t = f_update(prm.lrate)

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.
    
                if np.mod(uidx, prm.dispFreq) == 0:

                    print '\n================================================================================'
                    print 'Epoch', eidx, 'Update', uidx, 'Cost', cost, 'LR_t', lr_t
                    print 'Time Minibatch Update: ' + str(time.time() - st)
                    print 'Input Query:       ', qi[0].replace('\n','\\n')
                    print
                    print 'Target Docs:       ', str(D_gt_url[0])
                    print
                    print 'Input Query Vocab: ', utils.idx2text(qi_i[0], options['vocabinv'])
                    for ii in range(prm.n_iterations):
                        prob = out.pop(0)
                        ans = out.pop(0)
                        metrics = out.pop(0)
                        bl = out.pop(0)
                        cost_bl = out.pop(0)
                        D_id = out.pop(0)
                        print 
                        print 'Iteration', ii
                        print 'Baseline Value', bl.mean(), 'Cost', cost_bl
                        print '  '.join(prm.metrics_map.keys())
                        print metrics.mean(0)
                        print
                        print 'Retrieved Docs:    ', str([options['engine'].id_title_map[d_id] for d_id in D_id[0]])
                        print
                        print 'Reformulated Query:', options['reformulated_queries'][ii][0]
                        print
                        print 'Query ANS:         ',
                        for kk, word in enumerate(options['current_queries'][0][:ans.shape[1]]):                         
                            if word not in options['vocab'] and word != '':
                                word += '<unk>'
                            if ans[0,kk] == 1:
                                word = word.upper()
                            print str(word), 
                        print
                        print
                        print 'prob[:,:,0].max(1).mean(), prob[:,:,0].mean(), prob[:,:,0].min(1).mean()', prob[:,:,0].max(1).mean(), prob[:,:,0].mean(), prob[:,:,0].min(1).mean()
                        print 'prob[:,:,1].max(1).mean(), prob[:,:,1].mean(), prob[:,:,1].min(1).mean()', prob[:,:,1].max(1).mean(), prob[:,:,1].mean(), prob[:,:,1].min(1).mean()
                    print '==================================================================================\n'


                if np.mod(uidx, validFreq) == 0 or uidx == 1:
             
                    kf_train = get_minibatches_idx(len(qi_train), prm.batch_size_pred, shuffle=True, max_samples=train_size)
                    kf_valid = get_minibatches_idx(len(qi_valid), prm.batch_size_pred, shuffle=True, max_samples=valid_size)
                    kf_test = get_minibatches_idx(len(qi_test), prm.batch_size_pred, shuffle=True, max_samples=test_size)

                    print '\nEvaluating - Training Set'
                    train_metrics = pred_error(f_pred, qi_train, dt_train, options, kf_train)

                    print '\nEvaluating - Validation Set'
                    valid_metrics = pred_error(f_pred, qi_valid, dt_valid, options, kf_valid)

                    print '\nEvaluating - Test Set'
                    test_metrics = pred_error(f_pred, qi_test, dt_test, options, kf_test)


                    his = [train_metrics, valid_metrics, test_metrics]
                    history_errs.append(his)
                    metric_idx = prm.metrics_map[prm.reward.upper()]
                    if (uidx == 0 or
                        valid_metrics[-1, metric_idx] >= np.array(history_errs)[:,1,-1,metric_idx].max()):

                        best_p = unzip(tparams)
                        bad_counter = 0


                    print '====================================================================================================='
                    print '  '.join(prm.metrics_map.keys())
                    print
                    print 'Train:'
                    print train_metrics
                    print
                    print 'Valid:'
                    print valid_metrics
                    print
                    print 'Test:'
                    print test_metrics
                    print
                    print '====================================================================================================='
                    if (len(history_errs) > prm.patience and
                        valid_metrics[-1, metric_idx] <= np.array(history_errs)[:-prm.patience,
                                                               1,-1,metric_idx].max()):
                        bad_counter += 1
                        if bad_counter > prm.patience:
                            print 'Early Stop!'
                            estop = True
                            break

                if prm.saveto and np.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(prm.saveto, history_errs=history_errs, **params)

                    print 'Done'

            print 'Seen %d samples' % n_samples

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"
    return
Beispiel #3
0
def pred_error(f_pred, queries, actions, options, iterator, verbose=False):
    """
    Just compute the error
    f_pred: Theano functin computing the prediction
    """

    valid_acc = np.zeros((prm.max_hops_train + 2), dtype=np.float32)
    valid_R = np.zeros((prm.max_hops_train + 2), dtype=np.float32)
    n = np.zeros((prm.max_hops_train + 2), dtype=np.float32)
    acts_pc = 0.
    acts_pt = 0.
    uidx = -1
    visited_pages = []            

    for _, valid_index in iterator:
        q_i, q_m = utils.text2idx2([queries[t].lower() for t in valid_index], options['vocab'], prm.max_words_query*prm.n_consec)
        acts = [actions[t] for t in valid_index]

        #fake acts that won't be used in the prediction
        acts_p = -np.ones((prm.max_hops_pred+1, len(q_i) * prm.k), dtype=np.float32)
        
        root_pages = get_root_pages(acts)

        best_answer, best_page_idx, R, pages_idx = f_pred(q_i, q_m, root_pages, acts_p, uidx)

        pages_idx_ = np.swapaxes(pages_idx,0,1)
        pages_idx_ = pages_idx_.reshape(pages_idx_.shape[0],-1)

        #get pages visited:
        for page_idx in pages_idx_:
            visited_pages.append([])
            for idx in page_idx:
                if idx != -1:
                    visited_pages[-1].append(idx)

        R_binary = np.ones_like(R)
        R_binary[R<1.0] = 0.0
        n[-1] += len(valid_index)
        valid_R[-1] += R.sum()
        valid_acc[-1] += R_binary.sum()
        
        # get correct page-actions.
        acts_p = get_acts(acts, prm.max_hops_pred, prm.k)

        pages_idx = pages_idx.reshape((pages_idx.shape[0],-1))

        # Check how many page actions the model got right.
        mask_pc = np.logical_or((pages_idx != -1.0), (acts_p != -1.0)).astype('float32')
        acts_pc += ((pages_idx == acts_p).astype('float32') * mask_pc).sum()
        acts_pt += mask_pc.sum() #total number of actions

        # compute accuracy per hop
        for i in range(prm.max_hops_train+1):
            n_hops = (acts_p != -1.0).astype('float32').sum(0)
            n_hops= n_hops.reshape((-1, prm.k))[:,0] # beam search use only the first n_samples actions
            ih = (n_hops==i)
            valid_R[i] += R[ih].sum()
            valid_acc[i] += R_binary[ih].sum()
            n[i] += ih.astype('float32').sum()

        with open(prm.outpath, 'a') as fout:
            fout.write("\n\nQuery: " + queries[valid_index[-1]].replace("\n"," "))
            nh = (acts_p[:,-1] != -1.0).astype('int32').sum()
            if nh == 0:
                fout.write('\nCorrect Path: ' + options['wiki'].get_article_title(int(root_pages[-1])))
            else:
                path = ''
                for a in acts_p[:nh, -1]:
                    path += ' -> ' + options['wiki'].get_article_title(int(a))
                fout.write('\nCorrect Path: ' + path)

            fout.write('\nNumber of hops: ' + str(int(nh)))
            fout.write('\nBest answer: ' + utils.idx2text(best_answer[-1], options['vocabinv']))
            fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1]))
            for i, pageidx in enumerate(pages_idx[:,-1]):
                fout.write('\niteration: ' +str(i) + " page idx " + str(pageidx) + ' title '+ options['wiki'].get_article_title(pageidx))

        uidx -= 1
        
    valid_R = valid_R / n
    valid_err = 1 - valid_acc / n
    acts_pc = acts_pc / acts_pt

    return valid_err, valid_R, acts_pc, visited_pages
Beispiel #4
0
def train_lstm():

    optimizer=adam  # only adam is supported by now.
    options = locals().copy()
    with open(prm.outpath, "a") as fout:
        fout.write("parameters:" + str(options) + str(prm.__dict__))

    print "loading dictionary..."
    vocab = utils.load_vocab(prm.vocab_path, prm.n_words)
    options['vocab'] = vocab

    options['vocabinv'] = {}
    for k,v in vocab.items():
        options['vocabinv'][v] = k

    print 'Loading data...'
    options['wiki'] = wiki.Wiki(prm.pages_path)
    options['wikiemb'] = wiki_emb.WikiEmb(prm.pages_emb_path)

    #load Q&A Wiki dataset
    qpp = qp.QP(prm.qp_path)
    q_train, q_valid, q_test = qpp.get_queries()
    a_train, a_valid, a_test = qpp.get_paths()

    print 'Building model'
    # This create the initial parameters as np ndarrays.
    # Dict name (string) -> np ndarray
    params, exclude_params = init_params()

    if prm.wordemb_path:
        print 'loading pre-trained weights for word embeddings'
        params = load_wemb(params, vocab)
        options['W'] = params['W']

    if prm.reload_model:
        load_params(prm.reload_model, params)

    params_next = OrderedDict()
    if prm.learning.lower() == 'q_learning' and prm.update_freq > 0:
        # copy params to params_next
        for kk, kv in params.items():
            params_next[kk] = kv.copy()

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    if prm.update_freq > 0:
        tparams_next = init_tparams(params_next)
    else:
        tparams_next = None
  
    if prm.learning.lower() == 'reinforce':
        R_mean = theano.shared(0.71*np.ones((1,)), name='R_mean')
        R_std = theano.shared(np.ones((1,)), name='R_std')
        baseline_vars = {'R_mean': R_mean, 'R_std': R_std}
    else:
        baseline_vars = {}

    iin, out, updates, is_train, sup, max_hops, k_beam, mixer, f_pred, consider_constant \
            = build_model(tparams, tparams_next, baseline_vars, options)

    #get only parameters that are not in the exclude_params list
    tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params])

    grads = tensor.grad(out[0], wrt=itemlist(tparams_), consider_constant=consider_constant)

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams_, grads, iin, out, updates)

    print 'Optimization'

    if prm.train_size == -1:
        train_size = len(q_train)
    else:
        train_size = prm.train_size

    if prm.valid_size == -1:
        valid_size = len(q_valid)
    else:
        valid_size = prm.valid_size

    if prm.test_size == -1:
        test_size = len(q_test)
    else:
        test_size = prm.test_size

    with open(prm.outpath, "a") as fout:
        fout.write("\n%d train examples" % len(q_train)) 
    with open(prm.outpath, "a") as fout:
        fout.write("\n%d valid examples" % len(q_valid)) 
    with open(prm.outpath, "a") as fout:
        fout.write("\n%d test examples" % len(q_test))

    history_errs = []
    best_p = None

    if prm.validFreq == -1:
        validFreq = len(q_train) / prm.batch_size_train
    else:
        validFreq = prm.validFreq

    if prm.saveFreq == -1:
        saveFreq = len(q_train) / prm.batch_size_train
    else:
        saveFreq = prm.saveFreq

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.time()
    
    experience = deque(maxlen=prm.replay_mem_size) # experience replay memory as circular buffer.
    experience_r = deque(maxlen=prm.replay_mem_size) # reward of each entry in the replay memory.

    try:
        for eidx in xrange(prm.max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(q_train), prm.batch_size_train, shuffle=True)

            for _, train_index in kf:
                st = time.time()

                uidx += 1
                is_train.set_value(1.)
                max_hops.set_value(prm.max_hops_train) # select training dataset
                k_beam.set_value(1) # Training does not use beam search
                
                # Select the random examples for this minibatch
                queries = [q_train[t].lower() for t in train_index]
                actions = [a_train[t] for t in train_index]
                
                if prm.learning.lower() == 'supervised':
                    sup.set_value(1.) # select supervised mode
                else:
                    sup.set_value(0.)

                # Get correct actions (supervision signal)
                acts_p =  get_acts(actions, prm.max_hops_train, k_beam=1)

                # MIXER
                if prm.mixer > 0 and prm.learning.lower() == 'reinforce':
                    mixer.set_value(max(0, prm.max_hops_train - uidx // prm.mixer))
                else:
                    if prm.learning.lower() == 'supervised':
                        mixer.set_value(prm.max_hops_train+1)
                    else:
                        mixer.set_value(0)

                root_pages = get_root_pages(actions)                
                
                # Get the BoW for the queries.
                q_i, q_m = utils.text2idx2(queries, vocab, prm.max_words_query*prm.n_consec)
                n_samples += len(queries)
                
                if uidx > 1 and prm.learning.lower() == 'q_learning':
                    # Randomly select experiences and convert them to numpy arrays.
                    idxs = np.random.choice(np.arange(len(experience)), size=len(queries))
                    rvs = []
                    for j in range(len(experience[idxs[0]])):
                        rv = []
                        for idx in idxs:
                            rv.append(experience[idx][j])

                        rvs.append(np.asarray(rv))
                else:
                    rvs = [np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q
                           np.zeros((len(queries),prm.max_words_query*prm.n_consec),dtype=np.float32), # rs_q_m
                           np.zeros((len(queries),prm.max_hops_train+1),dtype=np.int32), # rl_idx
                           np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32), # rt
                           np.zeros((len(queries),prm.max_hops_train+1),dtype=np.float32) # rr
                          ]

                cost, R, l_idx, pages_idx, best_page_idx, best_answer, mask, dist \
                        = f_grad_shared(q_i, q_m, root_pages, acts_p, uidx, *rvs)
                f_update(prm.lrate)

                if prm.learning.lower() == 'q_learning': 
                    # update weights of the next_q_val network.
                    if (prm.update_freq > 0 and uidx % prm.update_freq == 0) or (uidx == prm.replay_start):
                        for tk, tv in tparams.items():
                            if tk in tparams_next:
                                tparams_next[tk].set_value(tv.get_value().copy())

                # Only update memory after freeze_mem or before replay_start.
                if (uidx < prm.replay_start or uidx > prm.freeze_mem) and prm.learning.lower() == 'q_learning':
                    # Update Replay Memory.
                    t = np.zeros((len(queries), prm.max_hops_train+1))
                    rR = np.zeros((len(queries), prm.max_hops_train+1))

                    for i in range(len(queries)):
                        j = np.minimum(mask[i].sum(), prm.max_hops_train)
                        # If the agent chooses to stop or the episode ends,
                        # the reward will be the reward obtained with the chosen document.
                        rR[i,j] = R[i]
                        t[i,j] = 1.
                        
                        add = True
                        if prm.selective_mem >= 0 and uidx > 1:
                            # Selective memory: keep the percentage of memories
                            # with reward=1 approximately equal to <selective_mem>.
                            pr = float(np.asarray(experience_r).sum()) / max(1., float(len(experience_r)))
                            if (pr < prm.selective_mem) ^ (rR[i,j] == 1.): # xor
                                add = False

                        if add:
                            experience.append([q_i[i], q_m[i], l_idx[i], t[i], rR[i]])
                            experience_r.append(rR[i])

                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.
    
                #if uidx % 100 == 0:
                #    vis_att(pages_idx[:,-1], queries[-1], alpha[:,-1,:], uidx, options)

                if np.mod(uidx, prm.dispFreq) == 0:
                    with open(prm.outpath, "a") as fout:
                        fout.write("\n\nQuery: " + queries[-1].replace("\n"," "))
                        fout.write('\nBest Answer: ' + utils.idx2text(best_answer[-1], options['vocabinv']))
                        fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1]))

                        for i, page_idx in enumerate(pages_idx[:,-1]):
                            fout.write('\niteration: ' +str(i) + " page idx " + str(page_idx) + ' title: ' + options['wiki'].get_article_title(page_idx))
                       
                        fout.write('\nEpoch '+ str(eidx) + ' Update '+ str(uidx) + ' Cost ' + str(cost) + \
                                   ' Reward Mean ' + str(R.mean()) + ' Reward Max ' + str(R.max()) + \
                                   ' Reward Min ' + str(R.min()) + \
                                   ' Q-Value Max (avg per sample) ' + str(dist.max(2).mean()) + \
                                   ' Q-Value Mean ' + str(dist.mean()))
                        #fout.write("\nCost Supervised: " + str(cost_sup))
                        #fout.write("\nCost RL: " + str(cost_RL))

                        fout.write("\nTime per Minibatch Update: " + str(time.time() - st))
                       

                if prm.saveto and np.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(prm.saveto, history_errs=history_errs, **params)
                    pkl.dump(options, open('%s.pkl' % prm.saveto, 'wb'), -1)
                    print 'Done'

                if np.mod(uidx, validFreq) == 0 or uidx == 1:
                    if prm.visited_pages_path:
                        shuffle = False
                    else:
                        shuffle = True
                    kf_train = get_minibatches_idx(len(q_train), prm.batch_size_pred, shuffle=shuffle, max_samples=train_size)
                    kf_valid = get_minibatches_idx(len(q_valid), prm.batch_size_pred, shuffle=shuffle, max_samples=valid_size)
                    kf_test = get_minibatches_idx(len(q_test), prm.batch_size_pred, shuffle=shuffle, max_samples=test_size)

                    is_train.set_value(0.)
                    sup.set_value(0.) # supervised mode off
                    mixer.set_value(0) # no supervision
                    max_hops.set_value(prm.max_hops_pred)
                    k_beam.set_value(prm.k)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Training Set')
                    train_err, train_R, train_accp, visited_pages_train = pred_error(f_pred, q_train, a_train, options, kf_train)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Validation Set')
                    valid_err, valid_R, valid_accp, visited_pages_valid = pred_error(f_pred, q_valid, a_valid, options, kf_valid)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Test Set')
                    test_err, test_R, test_accp, visited_pages_test = pred_error(f_pred, q_test, a_test, options, kf_test)

                    if prm.visited_pages_path:
                        pkl.dump([visited_pages_train, visited_pages_valid, visited_pages_test], open(prm.visited_pages_path, 'wb'))

                    history_errs.append([valid_err[-1], test_err[-1]])

                    if (uidx == 0 or
                        valid_err[-1] <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    with open(prm.outpath, "a") as fout:
                        fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + '  Valid err ' + str(valid_err) + '  Test err ' + str(test_err))
                        fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + '  Valid R ' + str(valid_R) + '  Test R ' + str(test_R))
                        fout.write('\nAccuracy Page Actions   Train ' + str(train_accp) + '  Valid ' + str(valid_accp) + '  Test ' + str(test_accp))

                    if (len(history_errs) > prm.patience and
                        valid_err[-1] >= np.array(history_errs)[:-prm.patience,
                                                               0].min()):
                        bad_counter += 1
                        if bad_counter > prm.patience:
                            print 'Early Stop!'
                            estop = True
                            break

            with open(prm.outpath, "a") as fout:
                fout.write('\nSeen %d samples' % n_samples)

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    is_train.set_value(0.)
    sup.set_value(0.) # supervised mode off
    mixer.set_value(0) # no supervision
    max_hops.set_value(prm.max_hops_pred)
    k_beam.set_value(prm.k)

    kf_train_sorted = get_minibatches_idx(len(q_train), prm.batch_size_train)

    train_err, train_R, train_accp, visited_pages_train = pred_error(f_pred, q_train, a_train, options, kf_train_sorted)
    valid_err, valid_R, valid_accp, visited_pages_valid = pred_error(f_pred, q_valid, a_valid, options, kf_valid)
    test_err, test_R, test_accp, visited_pages_test = pred_error(f_pred, q_test, a_test, options, kf_test)

    with open(prm.outpath, "a") as fout:
        fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + '  Valid err ' + str(valid_err) + '  Test err ' + str(test_err))
        fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + '  Valid R ' + str(valid_R) + '  Test R ' + str(test_R))
        fout.write('\nAccuracy Page Actions   Train ' + str(train_accp) + '  Valid ' + str(valid_accp) + '  Test ' + str(test_accp))

    if prm.saveto:
        np.savez(prm.saveto, train_err=train_err,
                    valid_err=valid_err, test_err=test_err,
                    history_errs=history_errs, **best_p)
    with open(prm.outpath, "a") as fout:
        fout.write('\nThe code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1))))
    with open(prm.outpath, "a") as fout:
        fout.write('\nTraining took %.1fs' % (end_time - start_time))
    return train_err, valid_err, test_err
Beispiel #5
0
def train_lstm():

    optimizer=adam  # only adam is supported by now.
    options = locals().copy()
    with open(prm.outpath, "a") as fout:
        fout.write("parameters:" + str(options) + str(prm.__dict__))

    print "loading dictionary..."
    vocab = utils.load_vocab(prm.vocab_path, prm.n_words)
    options['vocab'] = vocab

    options['vocabinv'] = {}
    for k,v in vocab.items():
        options['vocabinv'][v] = k

    print 'Loading data...'
    options['wiki'] = wiki.Wiki(prm.pages_path)
    options['wikiemb'] = wiki_emb.WikiEmb(prm.pages_emb_path)
    qpp = qp.QP(prm.qp_path)
    q_train, q_valid, q_test = qpp.get_queries()
    a_train, a_valid, a_test = qpp.get_paths()

    print 'Building model'
    # This create the initial parameters as np ndarrays.
    # Dict name (string) -> np ndarray
    params, exclude_params = init_params()

    if prm.reload_model:
        load_params(prm.reload_model, params)

    if prm.wordemb_path:
        print 'loading pre-trained weights for word embeddings'
        params = load_wemb(params, vocab)
        options['W'] = params['W']

    # This create Theano Shared Variable from the parameters.
    # Dict name (string) -> Theano Tensor Shared Variable
    # params and tparams have different copy of the weights.
    tparams = init_tparams(params)

    mean = theano.shared(np.zeros((prm.dim_proj,)).astype(config.floatX)) # avg of the training set
    std = theano.shared(np.zeros((prm.dim_proj,)).astype(config.floatX)) # std of the training set
    t_samples = theano.shared(np.zeros((1,)).astype(config.floatX)) # total number of samples so far
    stats_vars = {'mean': mean, 'std': std, 't_samples': t_samples}
    
    if prm.supervised:
        baseline_vars = {}
    else:
        R_mean = theano.shared(0.71*np.ones((1,)), name='R_mean')
        R_std = theano.shared(np.ones((1,)), name='R_std')
        baseline_vars = {'R_mean': R_mean, 'R_std': R_std}


    is_train, sup, max_hops, k_beam, tq, tq_m, troot_pages, tacts_p, f_pred, cost, \
            scan_updates, baseline_updates, stats_updates, consider_constant, \
            opt_out = \
            build_model(tparams, baseline_vars, stats_vars, options)
            
            
    if prm.decay_c > 0.:
        decay_c = theano.shared(np_floatX(prm.decay_c), name='decay_c')
        weight_decay = 0.
        weight_decay += (tparams['U'] ** 2).sum()
        weight_decay *= decay_c
        cost += weight_decay
    
    #get only parameters that are not in the exclude_params list
    tparams_ = OrderedDict([(kk, vv) for kk, vv in tparams.iteritems() if kk not in exclude_params])

    grads = tensor.grad(cost, wrt=itemlist(tparams_), consider_constant=consider_constant)

    lr = tensor.scalar(name='lr')
    f_grad_shared, f_update = optimizer(lr, tparams_, grads, tq, tq_m, troot_pages, tacts_p, cost, scan_updates, baseline_updates, \
                                       stats_updates, opt_out=[opt_out['R'], opt_out['page_idx'], opt_out['best_answer'], opt_out['best_page_idx']])

    print 'Optimization'

    if prm.train_size == -1:
        train_size = len(q_train)
    else:
        train_size = prm.train_size

    if prm.valid_size == -1:
        valid_size = len(q_valid)
    else:
        valid_size = prm.valid_size

    if prm.test_size == -1:
        test_size = len(q_test)
    else:
        test_size = prm.test_size

    with open(prm.outpath, "a") as fout:
        fout.write("\n%d train examples" % len(q_train)) 
    with open(prm.outpath, "a") as fout:
        fout.write("\n%d valid examples" % len(q_valid)) 
    with open(prm.outpath, "a") as fout:
        fout.write("\n%d test examples" % len(q_test))

    history_errs = []
    best_p = None

    if prm.validFreq == -1:
        validFreq = len(q_train) / prm.batch_size_train
    else:
        validFreq = prm.validFreq

    if prm.saveFreq == -1:
        saveFreq = len(q_train) / prm.batch_size_train
    else:
        saveFreq = prm.saveFreq

    uidx = 0  # the number of update done
    estop = False  # early stop
    start_time = time.time()

    try:
        for eidx in xrange(prm.max_epochs):
            n_samples = 0

            # Get new shuffled index for the training set.
            kf = get_minibatches_idx(len(q_train), prm.batch_size_train, shuffle=True)

            for _, train_index in kf:
                st = time.time()

                uidx += 1
                is_train.set_value(1.)
                max_hops.set_value(prm.max_hops_train) # select training dataset
                k_beam.set_value(1) # Training does not use beam search
                
                # Select the random examples for this minibatch
                queries = [q_train[t].lower() for t in train_index]
                actions = [a_train[t] for t in train_index]
                
                if prm.supervised == 1:
                    sup_ = True
                elif prm.supervised > 1:
                    if uidx % (int(uidx / prm.supervised) + 1) == 0:
                        sup_ = True
                    else: 
                        sup_ = False
                else:
                    sup_ = False
                    
                if sup_:
                    sup.set_value(1.) # select supervised mode
                    # Get correct actions (supervision signal)
                    acts_p =  get_acts(actions, prm.max_hops_train, k_beam=1)
                else:
                    sup.set_value(0.) # select non-supervised mode
                    acts_p = -np.ones((prm.max_hops_train+1, len(queries)), dtype=np.float32)

                root_pages = get_root_pages(actions)
                
                # Get the BoW for the queries
                q_bow, q_m = utils.BOW2(queries, vocab, prm.max_words_query*prm.n_consec)
                n_samples += len(queries)
                cost, R, pagesidx, best_answer, best_page_idx = f_grad_shared(q_bow, q_m, root_pages, acts_p)
                f_update(prm.lrate) 
                if np.isnan(cost) or np.isinf(cost):
                    print 'NaN detected'
                    return 1., 1., 1.

                if np.mod(uidx, prm.dispFreq) == 0:
                    with open(prm.outpath, "a") as fout:
                        fout.write("\n\nQuery: " + queries[-1].replace("\n"," "))
                        fout.write('\nBest Answer: ' + utils.idx2text(best_answer[-1], options['vocabinv']))
                        fout.write('\nBest page: ' + options['wiki'].get_article_title(best_page_idx[-1]))

                        for i, pageidx in enumerate(pagesidx[:,-1]):
                            fout.write('\niteration: ' +str(i) + " page idx " + str(pageidx) + ' title: ' + options['wiki'].get_article_title(pageidx))
                       
                        fout.write('\nEpoch '+ str(eidx) + ' Update '+ str(uidx) + ' Cost ' + str(cost) + \
                                   ' Reward Mean ' + str(R.mean()) + ' Reward Max ' + str(R.max()) +  \
                                   ' Reward Min ' + str(R.min()))

                        fout.write("\nTime per Minibatch Update: " + str(time.time() - st))
                       

                if prm.saveto and np.mod(uidx, saveFreq) == 0:
                    print 'Saving...',

                    if best_p is not None:
                        params = best_p
                    else:
                        params = unzip(tparams)
                    np.savez(prm.saveto, history_errs=history_errs, **params)
                    pkl.dump(options, open('%s.pkl' % prm.saveto, 'wb'), -1)
                    print 'Done'

                if np.mod(uidx, validFreq) == 0:

                    kf_train = get_minibatches_idx(len(q_train), prm.batch_size_pred, shuffle=True, max_samples=train_size)
                    kf_valid = get_minibatches_idx(len(q_valid), prm.batch_size_pred, shuffle=True, max_samples=valid_size)
                    kf_test = get_minibatches_idx(len(q_test), prm.batch_size_pred, shuffle=True, max_samples=test_size)

                    is_train.set_value(0.)
                    sup.set_value(0.) # supervised mode off
                    max_hops.set_value(prm.max_hops_pred)
                    k_beam.set_value(prm.k)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Training Set')
                    train_err, train_R, train_accp = pred_error(f_pred, q_train, a_train, options, kf_train)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Validation Set')
                    valid_err, valid_R, valid_accp = pred_error(f_pred, q_valid, a_valid, options, kf_valid)

                    with open(prm.outpath, 'a') as fout:
                        fout.write('\n\nComputing Error Test Set')
                    test_err, test_R, test_accp = pred_error(f_pred, q_test, a_test, options, kf_test)

                    history_errs.append([valid_err[-1], test_err[-1]])

                    if (uidx == 0 or
                        valid_err[-1] <= np.array(history_errs)[:,0].min()):

                        best_p = unzip(tparams)
                        bad_counter = 0

                    with open(prm.outpath, "a") as fout:
                        fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + '  Valid err ' + str(valid_err) + '  Test err ' + str(test_err))
                        fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + '  Valid R ' + str(valid_R) + '  Test R ' + str(test_R))
                        fout.write('\nAccuracy Page Actions   Train ' + str(train_accp) + '  Valid ' + str(valid_accp) + '  Test ' + str(test_accp))

                    if (len(history_errs) > prm.patience and
                        valid_err[-1] >= np.array(history_errs)[:-prm.patience,
                                                               0].min()):
                        bad_counter += 1
                        if bad_counter > prm.patience:
                            print 'Early Stop!'
                            estop = True
                            break

            with open(prm.outpath, "a") as fout:
                fout.write('\nSeen %d samples' % n_samples)

            if estop:
                break

    except KeyboardInterrupt:
        print "Training interupted"

    end_time = time.time()
    if best_p is not None:
        zipp(best_p, tparams)
    else:
        best_p = unzip(tparams)

    is_train.set_value(0.)
    sup.set_value(0.) # supervised mode off
    max_hops.set_value(prm.max_hops_pred)
    k_beam.set_value(prm.k)

    kf_train_sorted = get_minibatches_idx(len(q_train), prm.batch_size_train)

    train_err, train_R, train_accp = pred_error(f_pred, q_train, a_train, options, kf_train_sorted)
    valid_err, valid_R, valid_accp = pred_error(f_pred, q_valid, a_valid, options, kf_valid)
    test_err, test_R, test_accp = pred_error(f_pred, q_test, a_test, options, kf_test)

    with open(prm.outpath, "a") as fout:
        fout.write('\n[{per hop}, Avg] Train err ' + str(train_err) + '  Valid err ' + str(valid_err) + '  Test err ' + str(test_err))
        fout.write('\n[{per hop}, Avg] Train R ' + str(train_R) + '  Valid R ' + str(valid_R) + '  Test R ' + str(test_R))
        fout.write('\nAccuracy Page Actions   Train ' + str(train_accp) + '  Valid ' + str(valid_accp) + '  Test ' + str(test_accp))

    if prm.saveto:
        np.savez(prm.saveto, train_err=train_err,
                    valid_err=valid_err, test_err=test_err,
                    history_errs=history_errs, **best_p)
    with open(prm.outpath, "a") as fout:
        fout.write('\nThe code run for %d epochs, with %f sec/epochs' % ((eidx + 1), (end_time - start_time) / (1. * (eidx + 1))))
    with open(prm.outpath, "a") as fout:
        fout.write('\nTraining took %.1fs' % (end_time - start_time))
    return train_err, valid_err, test_err