def run_experiment(desc, data, start): labels, sent1, sent2 = extract_labels_data(data) #temporarily working with only a few datapoints #labels = labels[0:NUM_EXAMPLES] ''' cat_lab = convert_to_categorical(labels) labels = np.asarray(batching(labels,len(labels),batchSize=BATCH_SIZE)) cat_lab = np.asarray(batching(cat_lab,len(cat_lab),batchSize=BATCH_SIZE)) n,nBatch = labels.shape nData = n*nBatch ''' write_to_file(STATUS_LOG, desc) sent1_ = wrapper_model("Sent1", MODEL_PATH1, MODEL, sent1) sent2_ = wrapper_model("Sent2", MODEL_PATH2, MODEL, sent2) conc = concat_hidden_states(sent1_, sent2_) conc = wrapper_model("Conc", CONCAT, MODEL, conc) #dimensionality reduction dim = transposer(conc) dim = transposer(wrapper_model("Dim_R", DIM_RED, MODEL, dim)) #log_time(start) #classifier ''' weights, biases= getParameters(nBatch,CLASSIFIER,logistic=True) classifier = Logistic_Classifier(nData,dim,cat_lab,labels,n,CLASSIFIER) classifier.test_classifier(desc,weights,biases) ''' _classifier(desc, dim, labels, "Pred", model_dir=CLASSIFIER)
def format_file(file,display=None): status = '{0: <10}'.format('Batch') if not display is None: display_b = display else: display_b = display_batch for batch_no in range(n): if(batch_no % display_b == 0): status += '{0: <10}'.format(str(batch_no)) status += '{0: <10}'.format('Avg.Batch Cost') write_to_file(file,status)
def log_time(start): start_time = time.asctime( time.localtime(start)) print(start_time) end = time.time() end_time = time.asctime( time.localtime(end)) print(end_time) duration = time.strftime('%H:%M:%S', time.gmtime(end - start)) status = '{0: <19}'.format('Program Started') status +='{0: <27}'.format(': ' + start_time) status +='{0: <20}'.format('\nProgram Ended') status +='{0: <27}'.format(': ' + end_time) status += '{0: <20}'.format('\nDuration') status +='{0: <27}'.format(': ' + duration) write_to_file(STATUS_LOG,status)
def wrapper_rbm(desc,dest_dir,num_epochs,raw_sents=None): global DIMENSION #preprocesses raw sentences, looks up their embeddings and groups them in batches if not raw_sents is None: with tf.Session(graph = tf.Graph()) as sess: vocab, word_embeddings = load_processed_embeddings(sess) get_sents_embeddings(raw_sents,word_embeddings,vocab,sess,dest_dir) train_writer = tf.summary.FileWriter(dest_dir) write_to_file(STATUS_LOG,desc) error = 0 for step in range(num_epochs): with tf.Session(graph = tf.Graph()) as sess: if(is_exists_saved_model(dest_dir)): print("training from saved model") #model = restore_model(dest_dir) model = step_restore_model(dest_dir) cost,err = train_from_saved(model,dest_dir,train_writer,step) error += err else: print("training from scratch") cost,err = train_from_scratch(dest_dir,train_writer,step) error += err print("Step ", step, " Cost: ", "{:.5f}".format(err)) #writing to file m = int(n / display_batch) width = 7 + (10 * m) format_av_cost = '{0: >' + str(width) + '}' av_cost = format_av_cost.format('Av.Cost') #av_cost = '{0: <10}'.format('Av.Cost') av_err = error/num_epochs _status = str("{:.5f}".format(av_err)) status = av_cost + '{0: >10}'.format(_status) status += '\n' write_to_file(STATUS_LOG,status) path_to_trained_model = dest_dir + 'model.ckpt' train_writer.close() #print_tensors_in_checkpoint_file(file_name=path_to_trained_model,tensor_name='',all_tensors=False) #tf.reset_default_graph() return path_to_trained_model
def save_info(nProc,errorLog): global nPos,nNeg nPos = twos_count + threes_count + fours_count + fives_count + multi_count nNeg = twos_count_neg + threes_count_neg + fours_count_neg + fives_count_neg status_msg = "<Abstract, Article> pairs\t: " + str(nProc) status_msg += "\n\nPositive examples\t\t\t\t: " + str(nPos) status_msg += printStats() status_msg += "\nNegative examples\t\t\t\t: " + str(nNeg) status_msg += printStats_neg() status_msg += "\nIgnored examples\t\t\t\t: " + str(total -nPos - nNeg) status_msg += "\nTotal\t\t\t\t\t\t\t\t: " + str(total) if(len(errorLog) > 0): status_msg += "\nError in line(s)\t\t\t\t: " + ' '.join(str(errorLog)) status_msg += "\n\nProgram started on\t\t\t: " + START_TIME end = time.time() end_time = time.asctime( time.localtime(end)) status_msg += "\nProgram ended on\t\t\t\t: " + end_time duration = end - START status_msg += "\nDuration of Program\t\t\t: " + time.strftime('%H:%M:%S', time.gmtime(duration)) write_to_file(STATUS_LOG, status_msg)
def run_model(desc, batches, source_dir, dest_dir): global DIMENSION nBatch, n_visible, embd_dim = batches[0].shape n = len(batches) DIMENSION = embd_dim w, bh, bv = getParameters(nBatch, source_dir) cost = 0 processed = [] status = '{0: <10}'.format(desc) for batch_no in range(n): with tf.Session(graph=tf.Graph()) as sess: visible = batches[batch_no] hidden_1, hidden_1_states = forward_prop(visible, w, bh) pos = positive(hidden_1, visible) #reconstruction visible_1 = back_prop(hidden_1_states, w, bv) neg_hidden, neg_hidden_states = forward_prop(visible_1, w, bh) neg = negative(neg_hidden, visible_1) cd = sess.run(contrastive_divergence(pos, neg)) err = sess.run(error(visible_1, visible)) hidden_1 = tf.transpose(hidden_1, perm=[0, 2, 1]) cost += err processed.append(sess.run(hidden_1)) if (batch_no % display_batch == 0): cst = str("{:.5f}".format(err)) status += '{0: <10}'.format(cst) av_batch_cost = cost / n format_av_batch_cost = str("{:.5f}".format(av_batch_cost)) status += '{0: <10}'.format(format_av_batch_cost) write_to_file(STATUS_LOG, status) return np.asarray(processed)
def train_from_saved(updates,dest_dir,train_writer,step): global DIMENSION cd = 0 #cd (Contrastive divergence) err = 0 str_step = 'Step ' + str(step) status = '{0: <10}'.format(str_step) hidden = [] #sess = tf.Session() print('batching') if(description == "Raw"): with tf.Session(graph = tf.Graph()) as sess: saver = tf.train.import_meta_graph(dest_dir+'temp.ckpt.meta') saver.restore(sess,dest_dir+'temp.ckpt') sent1_embed = sess.run("embeds:0") n_examples,n_visible,embd_dim = sent1_embed.shape DIMENSION = int(embd_dim) batches = batching(sent1_embed,n_examples,sess,dest_dir) elif(description == "Conc"): src_file1 = 'LOG_DIR_300/RBM_model/Sent1/model.ckpt' src_file2 = 'LOG_DIR_300/RBM_model/Sent2/model.ckpt' batches = concat_hidden_states(src_file1,src_file2) #concatenate the hidden states of sents 1 and 2 else: src_file = 'LOG_DIR_300/RBM_model/Concantenated/model.ckpt' batches = dimensionality_reduction(src_file) print('done batching') format_file(STATUS_LOG) for batch_no in range(n): with tf.Session(graph = tf.Graph()) as sess: #batch = return_batch(dest_dir,batch_no) batch = batches[batch_no] batch_cd,er,updates = train_RBM(updates,batch,batch_no,dest_dir,train_writer,step) _,_,_,h = updates #if batch_no > 0: #h = sess.run(h) hidden.append(h) cd += batch_cd err+= er temp_status = "Step " + str(step) + " Batch " + str(batch_no) +" Cost: " + "{:.5f}".format(er) write_to_file(TEMP_LOG,temp_status) print("Step ", step, " Batch ", batch_no," Cost: ", "{:.5f}".format(er)) if(batch_no % display_batch == 0): cost = str("{:.5f}".format(er)) status += '{0: <10}'.format(cost) av_batch_cost = err/n format_av_batch_cost = str("{:.5f}".format(av_batch_cost)) status += '{0: <10}'.format(format_av_batch_cost) write_to_file(STATUS_LOG,status) #hiddn = tf.convert_to_tensor(hidden) #sess.run(tf.global_variables_initializer()) #hiddn = sess.run(hiddn) print("preparing model for saving") model =[] w,bh,bv,_ = updates model.append(w) model.append(bh) model.append(bv) model.append(hidden) print("saving model") save_model(model,dest_dir) print("done saving") return cd/n, av_batch_cost
def writeToFile(): print("Writing to file...") if(nPos > 0): print("Writing positive examples to file..") write_to_file(POSITIVES,'\n'.join(str(u) for u in positves)) if(len(twos_a) > 0): write_to_file(TWOS_A,'\n'.join(str(u) for u in twos_a)) write_to_file(TWOS_B,'\n'.join(str(u) for u in twos_b)) write_to_file(TWOS_F,'\n'.join(str(u) for u in twos_f)) if(len(threes_a) > 0): write_to_file(THREES_A,'\n'.join(str(u) for u in threes_a)) write_to_file(THREES_B,'\n'.join(str(u) for u in threes_b)) write_to_file(THREES_C,'\n'.join(str(u) for u in threes_c)) write_to_file(THREES_F,'\n'.join(str(u) for u in threes_f)) if(len(fours_a) > 0): write_to_file(FOURS_A,'\n'.join(str(u) for u in fours_a)) write_to_file(FOURS_B,'\n'.join(str(u) for u in fours_b)) write_to_file(FOURS_C,'\n'.join(str(u) for u in fours_c)) write_to_file(FOURS_D,'\n'.join(str(u) for u in fours_d)) write_to_file(FOURS_F,'\n'.join(str(u) for u in fours_f)) if(len(fives_a) > 0): write_to_file(FIVES_A,'\n'.join(str(u) for u in fives_a)) write_to_file(FIVES_B,'\n'.join(str(u) for u in fives_b)) write_to_file(FIVES_C,'\n'.join(str(u) for u in fives_c)) write_to_file(FIVES_D,'\n'.join(str(u) for u in fives_d)) write_to_file(FIVES_E,'\n'.join(str(u) for u in fives_e)) write_to_file(FIVES_F,'\n'.join(str(u) for u in fives_f)) if(nNeg > 0): print("Writing negative examples to file..") write_to_file(NEGATIVES,'\n'.join(str(u) for u in negatves)) if(len(_twos_a) > 0): write_to_file(_TWOS_A,'\n'.join(str(u) for u in _twos_a)) write_to_file(_TWOS_B,'\n'.join(str(u) for u in _twos_b)) write_to_file(_TWOS_F,'\n'.join(str(u) for u in _twos_f)) if(len(_threes_a) > 0): write_to_file(_THREES_A,'\n'.join(str(u) for u in _threes_a)) write_to_file(_THREES_B,'\n'.join(str(u) for u in _threes_b)) write_to_file(_THREES_C,'\n'.join(str(u) for u in _threes_c)) write_to_file(_THREES_F,'\n'.join(str(u) for u in _threes_f)) if(len(_fours_a) > 0): write_to_file(_FOURS_A,'\n'.join(str(u) for u in _fours_a)) write_to_file(_FOURS_B,'\n'.join(str(u) for u in _fours_b)) write_to_file(_FOURS_C,'\n'.join(str(u) for u in _fours_c)) write_to_file(_FOURS_D,'\n'.join(str(u) for u in _fours_d)) write_to_file(_FOURS_F,'\n'.join(str(u) for u in _fours_f)) if(len(fives_a) > 0): write_to_file(_FIVES_A,'\n'.join(str(u) for u in _fives_a)) write_to_file(_FIVES_B,'\n'.join(str(u) for u in _fives_b)) write_to_file(_FIVES_C,'\n'.join(str(u) for u in _fives_c)) write_to_file(_FIVES_D,'\n'.join(str(u) for u in _fives_d)) write_to_file(_FIVES_E,'\n'.join(str(u) for u in _fives_e)) write_to_file(_FIVES_F,'\n'.join(str(u) for u in _fives_f))