def comment(repo_dir, report): good = [] # All commits on all branches bad = [] # No commits ugly = [] # Partial uplift good, bad, ugly = classify_gbu(report) failed_bugs = [] def x(bug_id): del report[bug_id] util.write_json(uplift.uplift_report_file, report) for i, j in (good, good_bug_comment), (bad, bad_bug_comment), (ugly, ugly_bug_comment): for bug_id in i: print "Commenting on bug %s" % bug_id try: j(repo_dir, bug_id, report[bug_id]) x(bug_id) except FailedToComment: failed_bugs.append(bug_id) if len(failed_bugs) > 0: filename = os.path.abspath('failed_comments_%s.json' % util.time_str()) print "The following bugs had commenting failures" print util.e_join(failed_bugs) print "Creating a file to use with the 'uplift comments' file to try just these." print "Fix the issue then run: uplift comments %s" % filename util.write_json(filename, report)
def populate_output_dir(out_dir): """ Populates outout dir with info files. """ #copying model generator file to dir shutil.copy(model.__file__, os.path.join(out_dir, "model.py")) #copying this file to dir shutil.copy(cfg.__file__, os.path.join(out_dir, "config.py")) #info file with open(os.path.join(out_dir, "info.txt"), "w") as f: print("date created (y-m-d):", util.date_str(), file=f) print("time created:", util.time_str(), file=f) print("git commit hash:", util.git_hash(), file=f)
def populate_out_dir(out_dir, train_set, val_set): ''' Populates output dir with info files. ''' #info file with open(os.path.join(out_dir, 'etc', 'train-log', 'info.txt'), 'w') as f: print('date created (y-m-d):', util.date_str(), file=f) print('time created:', util.time_str(), file=f) print('git commit hash:', util.git_hash(), file=f) #saving train/val filepaths with open(os.path.join(out_dir, 'input', 'train.csv'), 'w') as f: for path in train_set: print(path, file=f) with open(os.path.join(out_dir, 'input', 'val.csv'), 'w') as f: for path in val_set: print(path, file=f)
def main(_): ''' 1.load data(X:list of lint,y:int). 2.create session. 3.feed data. 4.training (5.validation) ,(6.prediction) ''' word2index, label2index, trainX, trainY, vaildX, vaildY, testX, testY = load_data( ) max_f1, max_p, max_r, max_acc, test_f1, test_acc, test_p, test_r = [0] * 8 voc_size, num_class, max_time_str = [len(word2index), len(label2index), ''] print("cnn_model.voc_size: {}, num_class: {}".format(voc_size, num_class)) num_examples, FLAGS.sentence_len = trainX.shape print("num_examples of training:", num_examples, ";sentence_len:", FLAGS.sentence_len) ''' print some message for debug purpose ''' print("trainX[0:10]:", trainX[0:10]) print("trainY[0]:", trainY[0:10]) print("train_y_short:", trainY[0]) ''' 2.create session ''' config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: ''' Instantiate Model ''' textCNN = TextCNN(filter_sizes, FLAGS.num_filters, num_class, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sentence_len, voc_size, FLAGS.embed_size, multi_label_flag=FLAGS.multi_label_flag) ''' Initialize Save ''' saver = tf.train.Saver() if False: # if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): print("Restoring Variables from Checkpoint.") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print('Initializing Variables') sess.run(tf.global_variables_initializer()) if FLAGS.use_embedding: index2word = {v: k for k, v in word2index.items()} load_embedding(sess, index2word, textCNN.Embedding, FLAGS.embedding_name) current_epoch = sess.run(textCNN.epoch_step) ''' 3.feed data & training ''' number_of_training_data = len(trainX) batch_size = FLAGS.batch_size iteration = 0 for epoch in range(current_epoch, FLAGS.num_epochs): loss, counter = 0.0, 0 for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): iteration = iteration + 1 if not epoch and not counter: print("trainX[start:end]:", trainX[start:end]) feed_dict = { textCNN.input_x: trainX[start:end], textCNN.dropout_keep_prob: 0.8, textCNN.is_training_flag: FLAGS.is_training_flag } if not FLAGS.multi_label_flag: feed_dict[textCNN.input_y] = trainY[start:end] else: feed_dict[textCNN.input_y_multilabel] = trainY[start:end] curr_loss, lr, _ = sess.run([ textCNN.loss_val, textCNN.learning_rate, textCNN.train_op ], feed_dict) loss, counter = loss + curr_loss, counter + 1 if not counter % 50: print( "%s Epoch %d\tBatch %d\tTrain Loss:%.3f\tLearning rate:%.5f" % (time_str(), epoch, counter, loss / float(counter), lr)) ''' vaild model ''' if not epoch % FLAGS.validate_every: eval_loss, f1, r, p, acc = do_eval(sess, textCNN, vaildX, vaildY, num_class) print( "Epoch %d Validation Loss:%.3f\tR:%.3f\tP:%.3f\tF1 Score:%.3f\tacc:%.3f" % (epoch, eval_loss, r * 100, p * 100, f1 * 100, acc * 100)) if r > max_r: max_time_str = time_str() max_f1, max_acc, max_p, max_r = [f1, acc, p, r] eval_loss, test_f1, test_r, test_p, test_acc = do_eval( sess, textCNN, testX, testY, num_class) print("Test Loss:%.3f|%.3f|%.3f|%.3f|%.3f" % (eval_loss, test_r * 100, test_p * 100, test_f1 * 100, test_acc * 100)) ''' save model to checkpoint ''' save_path = FLAGS.ckpt_dir + "model.ckpt" saver.save(sess, save_path, global_step=epoch) sess.run(textCNN.epoch_increment) ''' test model ''' if not epoch % 100: eval_loss, f1, r, p, acc = do_eval(sess, textCNN, testX, testY, num_class) print( "%s Epoch %d Test Loss:%.3f\tR:%.3f\tP:%.3f\tF1 Score:%.3f\tacc:%.3f" % (time_str(), epoch, eval_loss, r * 100, p * 100, f1 * 100, acc * 100)) ''' print train best ''' print("%s Train MAX F1_micro:%.3f|%.3f|%.3f|%.3f" % (max_time_str, max_r * 100, max_p * 100, max_f1 * 100, max_acc * 100)) print("%s Test F1_micro:%.3f|%.3f|%.3f|%.3f" % (max_time_str, test_r * 100, test_p * 100, test_f1 * 100, test_acc * 100))
def trainIters(self, pairs, first_iter, last_iter, logger, evaluator, log_every=100): start_total_time = time.time() start_epoch_time = time.time() # Reset every log_every start_train_time = time.time() # Reset every log_every total_loss = 0 # Reset every log_every avg_loss_history = [] avg_bleu_history = [] avg_rouge_history = [] avg_f1_history = [] num_unique_names_history = [] for iter in range(first_iter, last_iter + 1): training_pair = random.choice(pairs) input_tensor = training_pair[0] target_tensor = training_pair[1] loss = self.train(input_tensor, target_tensor) total_loss += loss if iter % log_every == 0: train_time_elapsed = time.time() - start_train_time torch.save(self.state_dict(), 'results/trained_model.pt') with open( os.path.join(constants.LOGS_DIR, 'iters_completed.txt'), 'w') as f: f.write(str(iter)) start_eval_time = time.time() names = evaluator.evaluate(self) eval_time_elapsed = time.time() - start_eval_time avg_loss_history.append(total_loss / log_every) avg_bleu_history.append(names['BLEU'].mean()) avg_rouge_history.append(names['ROUGE'].mean()) avg_f1_history.append(names['F1'].mean()) num_unique_names_history.append(len( names['Our Name'].unique())) epoch_time_elapsed = time.time() - start_epoch_time total_time_elapsed = time.time() - start_total_time log_dict = OrderedDict([ ("Iteration", '{}/{} ({:.1f}%)'.format(iter, last_iter, iter / last_iter * 100)), ("Average loss", avg_loss_history[-1]), ("Average BLEU", avg_bleu_history[-1]), ("Average ROUGE", avg_rouge_history[-1]), ("Average F1", avg_f1_history[-1]), ("Unique names", num_unique_names_history[-1]), ("Epoch time", time_str(epoch_time_elapsed)), ("Training time", time_str(train_time_elapsed)), ("Evaluation time", time_str(eval_time_elapsed)), ("Total training time", time_str(total_time_elapsed)) ]) logger.write_training_log( log_dict, os.path.join(constants.LOGS_DIR, 'train-log.txt')) logger.plot_and_save_histories(avg_loss_history, avg_bleu_history, avg_rouge_history, avg_f1_history, num_unique_names_history) logger.save_dataframe( names, os.path.join(constants.RESULTS_DIR, 'valid_names.csv')) histories = pd.DataFrame( OrderedDict([('Loss', avg_loss_history), ('BLEU', avg_bleu_history), ('ROUGE', avg_rouge_history), ('F1', avg_f1_history), ('num_names', num_unique_names_history)])) logger.save_dataframe( histories, os.path.join(constants.RESULTS_DIR, 'histories.csv')) # Reseting counters total_loss = 0 start_epoch_time = time.time() start_train_time = time.time()
def trainIters(self, env, evaluator): start_total_time = time.time() - env.total_training_time start_epoch_time = time.time() # Reset every LOG_EVERY iterations start_train_time = time.time() # Reset every LOG_EVERY iterations total_loss = 0 # Reset every LOG_EVERY iterations for iter in range(env.iters_completed + 1, constants.NUM_ITER + 1): row = env.train_methods.iloc[np.random.randint( len(env.train_methods))] input_tensor = row['source'] target_tensor = row['name'] loss = self.train(input_tensor, target_tensor) total_loss += loss if iter % constants.LOG_EVERY == 0: log('Completed {} iterations'.format(iter)) train_time_elapsed = time.time() - start_train_time log('Evaluating on validation set') start_eval_time = time.time() names = evaluator.evaluate(self) # save_dataframe(names, constants.VALIDATION_NAMES_FILE) eval_time_elapsed = time.time() - start_eval_time env.history = env.history.append( { 'Loss': total_loss / constants.LOG_EVERY, 'BLEU': names['BLEU'].mean(), 'ROUGE': names['ROUGE'].mean(), 'F1': names['F1'].mean(), 'num_names': len(names['GeneratedName'].unique()) }, ignore_index=True) epoch_time_elapsed = time.time() - start_epoch_time total_time_elapsed = time.time() - start_total_time env.total_training_time = total_time_elapsed history_last_row = env.history.iloc[-1] log_dict = OrderedDict([ ("Iteration", '{}/{} ({:.1f}%)'.format( iter, constants.NUM_ITER, iter / constants.NUM_ITER * 100)), ("Average loss", history_last_row['Loss']), ("Average BLEU", history_last_row['BLEU']), ("Average ROUGE", history_last_row['ROUGE']), ("Average F1", history_last_row['F1']), ("Unique names", int(history_last_row['num_names'])), ("Epoch time", time_str(epoch_time_elapsed)), ("Training time", time_str(train_time_elapsed)), ("Evaluation time", time_str(eval_time_elapsed)), ("Total training time", time_str(total_time_elapsed)) ]) write_training_log(log_dict, constants.TRAIN_LOG_FILE) plot_and_save_histories(env.history) env.iters_completed = iter env.save_train() # Reseting counters total_loss = 0 start_epoch_time = time.time() start_train_time = time.time()
import prettytable as pt import git import bzapi import branch_logic import util import find_commits import reporting import configuration as c # Should be smarter about these cache files and either manage them in sets # or use a single file which contains *all* the information only ever added to requirements_file = os.path.abspath("requirements.json") uplift_report_file = os.path.abspath("uplift_report.json") uplift_dated_file = os.path.abspath("uplift_outcome_%s.json" % util.time_str()) push_info_file = os.path.abspath("uplift_info_%s.json" % util.time_str()) skip_bugs_file = os.path.abspath("skip_bugs.json") def find_bugs(queries): bug_data = [] all_queries = [] for q in queries: all_queries.extend(bzapi.parse_bugzilla_query(q)) print "Running Bugzilla searches" for q in all_queries: sys.stdout.write('.') sys.stdout.flush() search_data = bzapi.search(q) for bug in search_data: