def run_experiment(desc, data, start):

    labels, sent1, sent2 = extract_labels_data(data)

    #temporarily working with only a few datapoints
    #labels = labels[0:NUM_EXAMPLES]
    '''
    cat_lab = convert_to_categorical(labels)    
    
    labels = np.asarray(batching(labels,len(labels),batchSize=BATCH_SIZE))    
    cat_lab = np.asarray(batching(cat_lab,len(cat_lab),batchSize=BATCH_SIZE))    
      
    n,nBatch = labels.shape  
    nData = n*nBatch      
    '''
    write_to_file(STATUS_LOG, desc)

    sent1_ = wrapper_model("Sent1", MODEL_PATH1, MODEL, sent1)
    sent2_ = wrapper_model("Sent2", MODEL_PATH2, MODEL, sent2)

    conc = concat_hidden_states(sent1_, sent2_)
    conc = wrapper_model("Conc", CONCAT, MODEL, conc)

    #dimensionality reduction
    dim = transposer(conc)
    dim = transposer(wrapper_model("Dim_R", DIM_RED, MODEL, dim))
    #log_time(start)
    #classifier
    '''
    weights, biases= getParameters(nBatch,CLASSIFIER,logistic=True)        
    classifier = Logistic_Classifier(nData,dim,cat_lab,labels,n,CLASSIFIER)
    classifier.test_classifier(desc,weights,biases)
    '''
    _classifier(desc, dim, labels, "Pred", model_dir=CLASSIFIER)
def format_file(file,display=None):
    status = '{0: <10}'.format('Batch')
    if not display is None:
        display_b = display
    else:
        display_b = display_batch
    for batch_no in range(n):
        if(batch_no % display_b == 0):
            status += '{0: <10}'.format(str(batch_no))
    status += '{0: <10}'.format('Avg.Batch Cost')    
    write_to_file(file,status)
def log_time(start):    
    start_time = time.asctime( time.localtime(start))
    print(start_time)
    end = time.time()
    end_time = time.asctime( time.localtime(end))
    print(end_time)
    duration = time.strftime('%H:%M:%S', time.gmtime(end - start))
    status = '{0: <19}'.format('Program Started')
    status +='{0: <27}'.format(': ' + start_time)
    status +='{0: <20}'.format('\nProgram Ended')
    status +='{0: <27}'.format(': ' + end_time)
    status += '{0: <20}'.format('\nDuration')
    status +='{0: <27}'.format(': ' + duration)
    write_to_file(STATUS_LOG,status)
def wrapper_rbm(desc,dest_dir,num_epochs,raw_sents=None):
    global DIMENSION
#preprocesses raw sentences, looks up their embeddings and groups them in batches
    if not raw_sents is None:
        with tf.Session(graph = tf.Graph()) as sess:        
            vocab, word_embeddings = load_processed_embeddings(sess)            
            get_sents_embeddings(raw_sents,word_embeddings,vocab,sess,dest_dir)                    
        
    train_writer = tf.summary.FileWriter(dest_dir)     
    write_to_file(STATUS_LOG,desc)    
    error = 0    
    
    for step in range(num_epochs):         
        with tf.Session(graph = tf.Graph()) as sess:             
            if(is_exists_saved_model(dest_dir)): 
                print("training from saved model")
                #model = restore_model(dest_dir)
                model = step_restore_model(dest_dir)
                cost,err = train_from_saved(model,dest_dir,train_writer,step)
                error += err                 
            else:  
                print("training from scratch")                                                                            
                cost,err = train_from_scratch(dest_dir,train_writer,step)                  
                error += err             
            print("Step ", step, " Cost: ", "{:.5f}".format(err))
    
    #writing to file 
    m = int(n / display_batch)       
    width = 7 + (10 * m)
    format_av_cost = '{0: >' + str(width) + '}'
    av_cost = format_av_cost.format('Av.Cost')
    #av_cost = '{0: <10}'.format('Av.Cost')
    av_err = error/num_epochs
    _status = str("{:.5f}".format(av_err))
    status = av_cost + '{0: >10}'.format(_status) 
    status += '\n'
    write_to_file(STATUS_LOG,status)
            
    path_to_trained_model = dest_dir + 'model.ckpt' 
    train_writer.close()
    #print_tensors_in_checkpoint_file(file_name=path_to_trained_model,tensor_name='',all_tensors=False)  
    #tf.reset_default_graph()
    
    return path_to_trained_model        
def save_info(nProc,errorLog): 
   global nPos,nNeg     
   nPos = twos_count + threes_count + fours_count + fives_count + multi_count
   nNeg = twos_count_neg + threes_count_neg + fours_count_neg + fives_count_neg
   status_msg = "<Abstract, Article> pairs\t: " + str(nProc)
   status_msg += "\n\nPositive examples\t\t\t\t: " + str(nPos)
   status_msg += printStats()
   status_msg += "\nNegative examples\t\t\t\t: " + str(nNeg)
   status_msg += printStats_neg()
   status_msg += "\nIgnored examples\t\t\t\t: " + str(total -nPos - nNeg)
   status_msg += "\nTotal\t\t\t\t\t\t\t\t: " + str(total)
   if(len(errorLog) > 0):
       status_msg += "\nError in line(s)\t\t\t\t: " + ' '.join(str(errorLog))
       
   status_msg += "\n\nProgram started on\t\t\t: " + START_TIME
   end = time.time()
   end_time = time.asctime( time.localtime(end))
   
   status_msg += "\nProgram ended on\t\t\t\t: " + end_time 
   duration = end - START
   status_msg += "\nDuration of Program\t\t\t: " + time.strftime('%H:%M:%S', time.gmtime(duration))
   
   write_to_file(STATUS_LOG, status_msg)    
def run_model(desc, batches, source_dir, dest_dir):
    global DIMENSION
    nBatch, n_visible, embd_dim = batches[0].shape
    n = len(batches)
    DIMENSION = embd_dim
    w, bh, bv = getParameters(nBatch, source_dir)
    cost = 0
    processed = []
    status = '{0: <10}'.format(desc)
    for batch_no in range(n):
        with tf.Session(graph=tf.Graph()) as sess:
            visible = batches[batch_no]
            hidden_1, hidden_1_states = forward_prop(visible, w, bh)
            pos = positive(hidden_1, visible)

            #reconstruction
            visible_1 = back_prop(hidden_1_states, w, bv)
            neg_hidden, neg_hidden_states = forward_prop(visible_1, w, bh)
            neg = negative(neg_hidden, visible_1)

            cd = sess.run(contrastive_divergence(pos, neg))
            err = sess.run(error(visible_1, visible))

            hidden_1 = tf.transpose(hidden_1, perm=[0, 2, 1])
            cost += err
            processed.append(sess.run(hidden_1))

            if (batch_no % display_batch == 0):
                cst = str("{:.5f}".format(err))
                status += '{0: <10}'.format(cst)

    av_batch_cost = cost / n
    format_av_batch_cost = str("{:.5f}".format(av_batch_cost))
    status += '{0: <10}'.format(format_av_batch_cost)
    write_to_file(STATUS_LOG, status)
    return np.asarray(processed)
def train_from_saved(updates,dest_dir,train_writer,step): 
    global DIMENSION
    cd = 0  #cd (Contrastive divergence)
    err = 0    
    str_step = 'Step ' + str(step)
    status = '{0: <10}'.format(str_step) 
    hidden = []
    #sess = tf.Session()
    print('batching')
    if(description == "Raw"):
        with tf.Session(graph = tf.Graph()) as sess:         
            saver = tf.train.import_meta_graph(dest_dir+'temp.ckpt.meta')   
            saver.restore(sess,dest_dir+'temp.ckpt')            
            sent1_embed = sess.run("embeds:0")            
            n_examples,n_visible,embd_dim = sent1_embed.shape             
            DIMENSION = int(embd_dim)            
            batches = batching(sent1_embed,n_examples,sess,dest_dir)              
            
    elif(description == "Conc"):
        src_file1 = 'LOG_DIR_300/RBM_model/Sent1/model.ckpt'
        src_file2 = 'LOG_DIR_300/RBM_model/Sent2/model.ckpt'
        batches = concat_hidden_states(src_file1,src_file2) #concatenate the hidden states of sents 1 and 2
        
    else:
        src_file = 'LOG_DIR_300/RBM_model/Concantenated/model.ckpt'
        batches = dimensionality_reduction(src_file)        
    print('done batching')
    
    format_file(STATUS_LOG)    
    for batch_no in range(n):
        with tf.Session(graph = tf.Graph()) as sess:         
            #batch = return_batch(dest_dir,batch_no) 
            batch = batches[batch_no] 
            batch_cd,er,updates = train_RBM(updates,batch,batch_no,dest_dir,train_writer,step)
            _,_,_,h = updates
            #if batch_no > 0:
                #h = sess.run(h)
            hidden.append(h)
            
            cd += batch_cd
            err+= er 
            temp_status = "Step " + str(step) + " Batch " + str(batch_no) +" Cost: " + "{:.5f}".format(er)
            write_to_file(TEMP_LOG,temp_status)
            print("Step ", step, " Batch ", batch_no," Cost: ", "{:.5f}".format(er))
            if(batch_no % display_batch == 0):            
                cost = str("{:.5f}".format(er))
                status += '{0: <10}'.format(cost) 
                
    av_batch_cost = err/n
    format_av_batch_cost = str("{:.5f}".format(av_batch_cost))
    status += '{0: <10}'.format(format_av_batch_cost)
    write_to_file(STATUS_LOG,status)
    
    #hiddn = tf.convert_to_tensor(hidden)
    #sess.run(tf.global_variables_initializer())            
    #hiddn = sess.run(hiddn)
    
    print("preparing model for saving")
    model =[]
    w,bh,bv,_ = updates
    model.append(w)
    model.append(bh)
    model.append(bv)
    model.append(hidden)
    print("saving model")    
    save_model(model,dest_dir)
    print("done saving")
    
    return cd/n, av_batch_cost
def writeToFile():    
    print("Writing to file...")        
    if(nPos > 0):
        print("Writing positive examples to file..")
        write_to_file(POSITIVES,'\n'.join(str(u) for u in positves))
        if(len(twos_a) > 0):            
            write_to_file(TWOS_A,'\n'.join(str(u) for u in twos_a))
            write_to_file(TWOS_B,'\n'.join(str(u) for u in twos_b))
            write_to_file(TWOS_F,'\n'.join(str(u) for u in twos_f))
        if(len(threes_a) > 0):
            write_to_file(THREES_A,'\n'.join(str(u) for u in threes_a))
            write_to_file(THREES_B,'\n'.join(str(u) for u in threes_b))
            write_to_file(THREES_C,'\n'.join(str(u) for u in threes_c))
            write_to_file(THREES_F,'\n'.join(str(u) for u in threes_f))
        if(len(fours_a) > 0):
            write_to_file(FOURS_A,'\n'.join(str(u) for u in fours_a))
            write_to_file(FOURS_B,'\n'.join(str(u) for u in fours_b))
            write_to_file(FOURS_C,'\n'.join(str(u) for u in fours_c))
            write_to_file(FOURS_D,'\n'.join(str(u) for u in fours_d))
            write_to_file(FOURS_F,'\n'.join(str(u) for u in fours_f))
        if(len(fives_a) > 0):
            write_to_file(FIVES_A,'\n'.join(str(u) for u in fives_a))
            write_to_file(FIVES_B,'\n'.join(str(u) for u in fives_b))
            write_to_file(FIVES_C,'\n'.join(str(u) for u in fives_c))
            write_to_file(FIVES_D,'\n'.join(str(u) for u in fives_d))
            write_to_file(FIVES_E,'\n'.join(str(u) for u in fives_e))
            write_to_file(FIVES_F,'\n'.join(str(u) for u in fives_f))
    
    if(nNeg > 0):
        print("Writing negative examples to file..")
        write_to_file(NEGATIVES,'\n'.join(str(u) for u in negatves))
        if(len(_twos_a) > 0):
            write_to_file(_TWOS_A,'\n'.join(str(u) for u in _twos_a))
            write_to_file(_TWOS_B,'\n'.join(str(u) for u in _twos_b))
            write_to_file(_TWOS_F,'\n'.join(str(u) for u in _twos_f))
        if(len(_threes_a) > 0):
            write_to_file(_THREES_A,'\n'.join(str(u) for u in _threes_a))
            write_to_file(_THREES_B,'\n'.join(str(u) for u in _threes_b))
            write_to_file(_THREES_C,'\n'.join(str(u) for u in _threes_c))
            write_to_file(_THREES_F,'\n'.join(str(u) for u in _threes_f))
        if(len(_fours_a) > 0):
            write_to_file(_FOURS_A,'\n'.join(str(u) for u in _fours_a))
            write_to_file(_FOURS_B,'\n'.join(str(u) for u in _fours_b))
            write_to_file(_FOURS_C,'\n'.join(str(u) for u in _fours_c))
            write_to_file(_FOURS_D,'\n'.join(str(u) for u in _fours_d))
            write_to_file(_FOURS_F,'\n'.join(str(u) for u in _fours_f))
        if(len(fives_a) > 0):
            write_to_file(_FIVES_A,'\n'.join(str(u) for u in _fives_a))
            write_to_file(_FIVES_B,'\n'.join(str(u) for u in _fives_b))
            write_to_file(_FIVES_C,'\n'.join(str(u) for u in _fives_c))
            write_to_file(_FIVES_D,'\n'.join(str(u) for u in _fives_d))
            write_to_file(_FIVES_E,'\n'.join(str(u) for u in _fives_e))
            write_to_file(_FIVES_F,'\n'.join(str(u) for u in _fives_f))