def train_agent(restore_agent_from='data/Prior.ckpt',
                scoring_function='activity_model',
                save_dir=None,
                learning_rate=0.0005,
                batch_size=64,
                n_steps=1000,
                sigma=100):

    voc = Vocabulary(init_from_file="data/voc")
    start_time = time.time()

    Prior = RNN(voc)
    Agent = RNN(voc)

    if torch.cuda.is_available():
        Prior.rnn.load_state_dict(torch.load('data/Prior.ckpt'))
        Agent.rnn.load_state_dict(torch.load(restore_agent_from))
    else:
        Prior.rnn.load_state_dict(
            torch.load('data/Prior.ckpt',
                       map_location=lambda storage, loc: storage))
        Agent.rnn.load_state_dict(
            torch.load(restore_agent_from,
                       map_location=lambda storage, loc: storage))

    for param in Prior.rnn.parameters():
        param.requires_grad = False

    optimizer = torch.optim.Adam(Agent.rnn.parameters(), lr=learning_rate)
    scoring_function = get_scoring_function(scoring_function=scoring_function)
    step_score = [[], []]
    print("Model initialized, starting training...")

    if not save_dir:
        save_dir = 'experiments/manuscript/1000steps_probtest_rewardonlynosmaller40_' + time.strftime(
            "%Y-%m-%d-%H_%M_%S", time.localtime())
    os.makedirs(save_dir)

    ## calcualte the probability of psmiles with predicted TC >= 0.4
    prob = []
    mean_ = []
    std_ = []
    for step in range(n_steps):
        seqs, agent_likelihood, entropy = Agent.sample(batch_size)
        unique_idxs = unique(seqs)
        seqs = seqs[unique_idxs]
        agent_likelihood = agent_likelihood[unique_idxs]
        entropy = entropy[unique_idxs]

        prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        smiles = []
        for seq in seqs.cpu().numpy():
            smiles.append(voc.decode(seq))
        score = scoring_function(smiles)

        ####
        count = 0
        score_filter = []
        for s in score:
            if s >= 0.4:
                score_filter.append(s)
                count += 1
            else:
                pass
        prob.append(count / 64)
        mean_.append(np.mean(score_filter))
        std_.append(np.std(score_filter))
        ####

        augmented_likelihood = prior_likelihood + sigma * Variable(score)
        loss = torch.pow((augmented_likelihood - agent_likelihood), 2)
        loss = loss.mean()

        regularization = -(1 / agent_likelihood).mean()
        loss += 5 * 1e3 * regularization

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # print out information during the training
        print("Agent    Prior    Target    Score        SMILES")
        for i in range(10):
            print("{:6.3f}  {:6.3f}  {:6.3f}  {:6.3f}    {}".format(
                agent_likelihood[i], prior_likelihood[i],
                augmented_likelihood[i], score[i], smiles[i]))

        step_score[0].append(step + 1)
        step_score[1].append(np.mean(score))

        # if step > 98 and (step+1) % 100 == 0:
        # # if step == 0:
        #     torch.save(Agent.rnn.state_dict(), os.path.join(save_dir, 'agent_baseline_{}.ckpt'.format(step+1)))

        #     seqs, agent_likelihood, entropy = Agent.sample(1000)
        #     prior_likelihood, _ = Prior.likelihood(Variable(seqs))
        #     prior_likelihood = prior_likelihood.data.cpu().numpy()
        #     smiles = []
        #     for seq in seqs.cpu().numpy():
        #         smiles.append(voc.decode(seq))
        #     score = scoring_function(smiles)
        #     with open(os.path.join(save_dir, "sampled_{}".format(step+1)), 'w') as f:
        #         f.write("SMILES  Score  PriorLogP\n")
        #         for s, sc, pri in zip(smiles, score, prior_likelihood):
        #             f.write("{}  {:5.3f}  {:6.3f}\n".format(s, sc, pri))

    step_score_data = pd.DataFrame({
        'Step': step_score[0],
        'Score': step_score[1],
        'Prob': prob,
        'MEAN': mean_,
        'STD': std_
    })
    step_score_data.to_csv(os.path.join(save_dir, "step_score_1000step.csv"),
                           index=None)
def main(restore_from=None, visualize=False):
    # read vocbulary from a file
    voc = Vocabulary(init_from_file="data/voc")

    # create a dataset from a smiles file
    moldata = MolData("data/mols_filtered.smi", voc)
    data = DataLoader(moldata,
                      batch_size=10,
                      shuffle=True,
                      drop_last=True,
                      collate_fn=MolData.collate_fn)

    agent = RNN(voc)

    # can restore from a saved RNN
    if restore_from:
        agent.rnn.load_state_dict(
            torch.load(restore_from, map_location=torch.device('cpu')))

    optimizer = torch.optim.Adam(agent.rnn.parameters(), lr=0.001)
    torch.autograd.set_detect_anomaly(True)
    valid_ratios = list()
    for epoch in range(1, 2):
        for step, batch in tqdm(enumerate(data), total=len(data)):
            # sample from DataLoader
            seqs = batch.long()

            # calculate loss
            log_p, _ = agent.likelihood(seqs)
            loss = -log_p.mean()
            # print(loss)

            # calculate gradients and take a step
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # every n steps we decrease learning rate and print out some information, n can be customized
            if step % 5 == 0 and step != 0:
                decrease_learning_rate(optimizer, decrease_by=0.03)
                tqdm.write("#" * 50)
                tqdm.write("Epoch {:3d} step {:3d} loss: {:5.2f}\n".format(
                    epoch, step, loss.data))
                seqs, likelihood, _ = agent.sample(128)
                valid = 0
                for i, seq in enumerate(seqs.cpu().numpy()):
                    smile = voc.decode(seq)
                    if Chem.MolFromSmiles(smile):
                        valid += 1
                    if i < 5:
                        tqdm.write(smile)
                    valid_ratio = 100 * valid / len(seqs)
                    valid_ratios.append(valid_ratio)
                    tqdm.write("\n{:>4.1f}% valid SMILES".format(100 * valid /
                                                                 len(seqs)))
                    tqdm.write("#" * 50 + "\n")
                    torch.save(agent.rnn.state_dict(), "data/Prior.ckpt")
        torch.save(agent.rnn.state_dict(), "data/Prior.ckpt")
    if visualize:
        plt.plot(range(len(valid_ratios)),
                 valid_ratios,
                 color='red',
                 linewidth=5)
        plt.savefig('/Users/ruiminma/Desktop/validratio.png',
                    bbox_inches='tight',
                    dpi=400)
Esempio n. 3
0
def train_rnn(num_batches_per_bunch = 512, batch_size = 1, num_bunches_queue = 5, offset = 0, path_name = '/exports/work/inf_hcrc_cstr_udialogue/siva/data/'):
    

    voc_list = Vocabulary(path_name + 'train')
    voc_list.vocab_create()
    vocab = voc_list.vocab
    vocab_size = voc_list.vocab_size
     
    dataprovider_train = DataProvider(path_name + 'train', vocab, vocab_size)
    dataprovider_valid = DataProvider(path_name + 'valid', vocab, vocab_size )
    dataprovider_test = DataProvider(path_name + 'test', vocab, vocab_size )
    
    print '..building the model'

    #symbolic variables for input, target vector and batch index
    index = T.lscalar('index')
    x = T.fvector('x')
    h0 = T.fvector('h0')
    y = T.ivector('y')
    learning_rate = T.fscalar('learning_rate') 

    #theano shared variables for train, valid and test
    train_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    train_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    valid_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    valid_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    test_set_x1 = theano.shared(numpy.empty((1,), dtype='float32'), allow_downcast = True)
    test_set_y = theano.shared(numpy.empty((1), dtype = 'int32'), allow_downcast = True)
    
    
    rng = numpy.random.RandomState()
   
    classifier = RNN(rng = rng, input = x, intial_hidden = h0, n_in = vocab_size, n_hidden = int(sys.argv[1]), n_out = vocab_size)
    
    cost = classifier.negative_log_likelihood(y)

    ht1_values = numpy.ones((int(sys.argv[1]), ), dtype = 'float32')
    
    ht1 = theano.shared(value = ht1_values, name = 'hidden_state')
    
    #constructor for learning rate class
    learnrate_schedular = LearningRateNewBob(start_rate = float(sys.argv[2]), scale_by=.5, max_epochs=9999,\
                                    min_derror_ramp_start=.01, min_derror_stop=.01, init_error=100.)

    log_likelihood = classifier.sum(y)
    likelihood = classifier.likelihood(y)
    
    #test_model
    test_model = theano.function(inputs = [], outputs = [log_likelihood, likelihood],  \
                                 givens = {x: test_set_x1,
                                           y: test_set_y,
                                           h0: ht1})
    #validation_model
    validate_model = theano.function(inputs = [], outputs = [log_likelihood], \
                                     givens = {x: valid_set_x1,
                                               y: valid_set_y,
                                               h0: ht1})

    gradient_param = []
    #calculates the gradient of cost with respect to parameters 
    for param in classifier.params:
        gradient_param.append(T.cast(T.grad(cost, param), 'float32'))
        
    updates = []
    #updates the parameters
    for param, gradient in zip(classifier.params, gradient_param):
        updates.append((param, T.cast(param - learning_rate * gradient - 0.000001 * param, dtype = 'float32')))
    
    #hidden_output = classifier.inputlayer.output
    #training_model
    train_model = theano.function(inputs = [learning_rate], outputs = [cost, classifier.inputlayer.output], updates = updates, \
                                 givens = {x: train_set_x1,
                                           y: train_set_y,
                                           h0:ht1})

    print '.....training'
    best_valid_loss = numpy.inf    
    start_time = time.time()
    while(learnrate_schedular.get_rate() != 0):
    
        print 'learning_rate:', learnrate_schedular.get_rate()
        print 'epoch_number:', learnrate_schedular.epoch        
        frames_showed, progress = 0, 0
        start_epoch_time = time.time()
        dataprovider_train.reset()
 
        for feats_lab_tuple in dataprovider_train:
    
            features, labels = feats_lab_tuple 
            
            if labels is None or features is None:
                continue                             
            frames_showed += features.shape[0]

            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                train_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                train_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                out = train_model(numpy.asarray(learnrate_schedular.get_rate(), dtype = 'float32'))       
                ht1.set_value(numpy.asarray(out[1], dtype = 'float32'), borrow = True)
            progress += 1
            if progress%10000==0:
                end_time_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
            train_set_x1.set_value(numpy.empty((1, ), dtype = 'float32'))
            train_set_y.set_value(numpy.empty((1), dtype = 'int32'))
        
        end_time_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames), TIME: %f in seconds'\
                          %(progress, frames_showed,(end_time_progress-start_epoch_time))
	
        #classifier_name = 'MLP' + str(learnrate_schedular.epoch)
        #save_mlp(classifier, path+exp_name1 , classifier_name)
    
        print 'Validating...'
        valid_losses = []
        log_likelihood = []
        valid_frames_showed, progress = 0, 0
        start_valid_time = time.time() # it is also stop of training time
        dataprovider_valid.reset()

        for feats_lab_tuple in dataprovider_valid:            
            features, labels = feats_lab_tuple            
            if labels is None or features is None:
                continue                             
            valid_frames_showed += features.shape[0]                
            for temp, i in zip(features, xrange(len(labels))):
                temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
                temp_features1[temp[0]] = 1
                valid_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
                valid_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
                log_likelihood.append(validate_model())
            valid_set_x1.set_value(numpy.empty((1), 'float32'))
            valid_set_y.set_value(numpy.empty((1), 'int32'))

            progress += 1
            if progress%1000==0:
                end_time_valid_progress = time.time()
                print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)
        
        end_time_valid_progress = time.time()
        print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, valid_frames_showed, end_time_valid_progress - start_valid_time)            
        entropy = (-numpy.sum(log_likelihood)/valid_frames_showed)
        print  entropy, numpy.sum(log_likelihood)

        if entropy < best_valid_loss:
           learning_rate = learnrate_schedular.get_next_rate(entropy)
	   best_valid_loss = entropy
        else:
           learnrate_schedular.rate = 0.0
    end_time = time.time()
    print 'The fine tuning ran for %.2fm' %((end_time-start_time)/60.)

    print 'Testing...'
    log_likelihood = []
    likelihoods = []
    test_frames_showed, progress = 0, 0
    start_test_time = time.time() # it is also stop of training time
    dataprovider_test.reset()
    
    for feats_lab_tuple in dataprovider_test:
        
        features, labels = feats_lab_tuple 
            
        if labels is None or features is None:
            continue                             

        test_frames_showed += features.shape[0]                
        for temp, i in zip(features, xrange(len(labels))):
            temp_features1 = numpy.zeros(vocab_size, dtype = 'float32')
            temp_features1[temp[0]] = 1
            test_set_x1.set_value(numpy.asarray(temp_features1, dtype = 'float32'), borrow = True)
            test_set_y.set_value(numpy.asarray([labels[i]], dtype = 'int32'), borrow = True)
            out = test_model()
            log_likelihood.append(out[0])
            likelihoods.append(out[1])
        progress += 1
        if progress%1000==0:
           end_time_test_progress = time.time()
           print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                          %(progress, test_frames_showed, end_time_test_progress - start_test_time)
    end_time_test_progress = time.time()
    print 'PROGRESS: Processed %i bunches (%i frames),  TIME: %f in seconds'\
                    %(progress, test_frames_showed, end_time_test_progress - start_test_time)            
    #save_posteriors(log_likelihood, likelihoods, weight_path+file_name2)
    print numpy.sum(log_likelihood)