def step1(data, args): print '### STEP 1: Train for classification task' pretrained_snapshot_fname = 'model_best_accuracy.th' train_loader, val_loader, test_loader = data n_samples_train = len(train_loader.dataset) n_samples_val = len(val_loader.dataset) n_samples_test = len(test_loader.dataset) num_classes = len(set(val_loader.dataset.target_tensor)) model = eval(args.modelArch)(num_classes=num_classes) best_val_acc = None test_acc = None # try to load pretrained model if step 1 has already been executed saved_model = load_model(model, pretrained_snapshot_fname, args) if saved_model is not None: print 'Loading pretrained model:', pretrained_snapshot_fname model = saved_model model.cuda() else: # else train a new model print 'Training a new model ...' logfile = open(os.path.join(args.workDir, 'log.txt'), 'wb') model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.learningRate) since = time.time() for epoch in trange(1, args.nEpochs + 1, desc='Epochs'): avg_loss = train(model, optimizer, epoch, train_loader, logfile, args) val_loss, val_acc, n_correct = evaluate(model, val_loader, args) if best_val_acc is None or best_val_acc < val_acc: best_val_acc = val_acc tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname) save_model(model, pretrained_snapshot_fname, args) logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)' tqdm.write(logline.format(epoch, args.nEpochs, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc)) time_elapsed = time.time() - since print 'Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60) model = load_model(model, pretrained_snapshot_fname, args) # TESTING ----------------- if not args.skipTest: test_loss, test_acc, n_correct = evaluate(model, test_loader, args) logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)' print logline.format(test_loss, n_correct, n_samples_test, test_acc) return model, {'BestValAccuracy': best_val_acc, 'TestAccuracy': test_acc}
def main(input_file, output_file, is_training=True): questions = common.read_qp_dump(input_file) output = open(output_file, "w") print("generating scores") model = common.load_model("wiki") wiki_scores = common.generate_scores(questions, model) model = common.load_model("google") google_scores = common.generate_scores(questions, model) model = common.load_model("quora") quora_scores = common.generate_scores(questions, model) print_combined_scores(wiki_scores, google_scores, quora_scores, file=output) output.close()
def run_evaluations(): data = [] test_file = f"output\\nfl\\test\\6.csv" model_file = f"models\\nfl\\6_model.pkl" output_file = "output\\nfl\\html\\testdata.json" model = common.load_model(model_file) _, X, y = common.read_data_from_file(test_file, "home_win", get_feature_headers()) data.append(evaluate.evaluate("6", model, X, y)) dict = {"data": data} with open(output_file, 'w') as summary_file: json.dump(dict, summary_file) groups = common.read_data_grouped(test_file, ['year']) for key in groups: X = groups[key][get_feature_headers()] y = groups[key]["home_win"] accuracy, manual_accuracy = evaluate.calculate_accuracy(model, X, y) print(f"{key}:{accuracy:.2f}")
def main(args=sys.argv[1:]): args = parse_args(args) logging.basicConfig(format="%(message)s", filename=args.log_file, level=logging.DEBUG) print(args) logging.info(args) # Read all data data_dict = pickle_from_file(args.data_file) # Get the appropriate datasplit split_dict = pickle_from_file(args.data_split_file) recalib_data = data_dict["train"].subset(split_dict["recalibrate_idxs"]) # Load model fitted_model = load_model(args.fitted_file) family = fitted_model.density_parametric_form if family == "gaussian": coverage_dict = recalibrate_intervals_gaussian(fitted_model, recalib_data, args) elif family == "bernoulli": coverage_dict = recalibrate_intervals_bernoulli( fitted_model, recalib_data, args) elif "multinomial" in family: coverage_dict = recalibrate_intervals_multinomial( fitted_model, recalib_data, args) else: raise ValueError("dunno what is going on") print(coverage_dict) pickle_to_file(coverage_dict, args.out_file)
def main(args=sys.argv[1:]): args = parse_args(args) logging.basicConfig(format="%(message)s", filename=args.log_file, level=logging.DEBUG) print(args) logging.info(args) # Read all data data_dict = pickle_from_file(args.data_file) # Get the appropriate datasplit split_dict = pickle_from_file(args.data_split_file) recalib_data = data_dict["train"].subset(split_dict["recalibrate_idxs"]) # Load model fitted_model = load_model(args.fitted_file) coverage_dict = {} for alpha in args.alphas: recalibrator = DecisionIntervalRecalibrator(fitted_model, alpha) inference_dict = recalibrator.recalibrate(recalib_data) print("RECALIB INF DICT", inference_dict["cov_given_accept"]) est_cov_given_accept = inference_dict["cov_given_accept"]["mean"] logging.info("Alpha %f, ideal cov %f, est cov|accept %f", alpha, 1 - alpha, est_cov_given_accept) logging.info(get_normal_ci(inference_dict["cov_given_accept"])) coverage_dict[alpha] = inference_dict pickle_to_file(coverage_dict, args.out_file)
def restore_or_create_model(num_train_examples, num_labels, global_batch_size, options): checkpoints = get_checkpoint_files(options.checkpoint_dir) print('Found {} checkpoint files: {}'.format( len(checkpoints), checkpoints), file=sys.stderr, flush=True) for checkpoint in checkpoints: # sorted by ctime print('Restoring from checkpoint', checkpoint, file=sys.stderr, flush=True) try: return load_model(checkpoint) except Exception as e: warning('Failed to restore from checkpoint {}: {}'.format( checkpoint, e)) # No checkpoint could be loaded print('Creating new model', file=sys.stderr, flush=True) pretrained_model = load_pretrained(options) output_offset = int(options.max_seq_length/2) model = create_model(pretrained_model, num_labels, output_offset, options.output_layer) optimizer = create_optimizer(num_train_examples, global_batch_size, options) model.compile( optimizer, loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'] ) return model
def main(): parser = argparse.ArgumentParser() # Required parameters parser.add_argument("--do_train", default=True, action='store_true') parser.add_argument('--do_eval', default=False, action='store_true') parser.add_argument("--do_predict", default=False, action='store_true') parser.add_argument('--markup', default='bieos', type=str, choices=['bios', 'bio', 'bieos']) # 标签类型 parser.add_argument("--arch", default='bilstm_crf', type=str) parser.add_argument('--learning_rate', default=0.001, type=float) parser.add_argument('--seed', default=1234, type=int) parser.add_argument('--gpu', default='', type=str) parser.add_argument('--epochs', default=100, type=int) parser.add_argument('--batch_size', default=32, type=int) parser.add_argument('--embedding_size', default=128, type=int) parser.add_argument('--hidden_size', default=384, type=int) parser.add_argument("--grad_norm", default=5.0, type=float, help="Max gradient norm.") parser.add_argument("--task_name", type=str, default='ner') args = parser.parse_args() args.data_dir = config.data_dir if not config.output_dir.exists(): args.output_dir.mkdir() args.output_dir = config.output_dir / '{}'.format(args.arch) if not args.output_dir.exists(): args.output_dir.mkdir() init_logger(log_file=str(args.output_dir / '{}-{}.log'.format(args.arch, args.task_name))) seed_everything(args.seed) if args.gpu != '': args.device = torch.device(f"cuda:{args.gpu}") else: args.device = torch.device("cpu") args.id2label = {i: label for i, label in enumerate(config.label2id)} args.label2id = config.label2id processor = CluenerProcessor(data_dir=config.data_dir) processor.get_vocab() model = BERT_NERModel(device=args.device, label2id=args.label2id, need_birnn=True) # model = NERModel(vocab_size=len(processor.vocab), embedding_size=args.embedding_size, # hidden_size=args.hidden_size, device=args.device, label2id=args.label2id) model.to(args.device) if args.do_train: train(args, model, processor) if args.do_eval: model_path = args.output_dir / 'best-model.bin' model = load_model(model, model_path=str(model_path)) evaluate(args, model, processor) if args.do_predict: predict(args, model, processor)
def strategy_evaluation(data_file: str, model_file: str, feature_columns: List[str], summary_file: str): model = common.load_model(model_file) data, X, y = common.read_data_from_file(data_file, "home_win", feature_columns) predictions = model.predict(X) probabilities = model.predict_proba(X) strat = strategies.all_strategies() for index, row in data.iterrows(): for s in strat: s.evaluate(row) for s in strat: r = s.get_results() profits = r.profits() arr = [ r.name, r.candidates, r.matches, r.covered, r.not_covered, profits[0], profits[1] ] add_to_json_summary(summary_file, arr)
def main(args=sys.argv[1:]): args = parse_args(args) np.random.seed(args.seed) # Read all data orig_data_dict = pickle_from_file(args.data_file) # Get the appropriate datasplit split_dict = pickle_from_file(args.data_split_file) recalib_data = orig_data_dict["train"].subset( split_dict["recalibrate_idxs"]) args.num_p = recalib_data.x.shape[1] # Load models fitted_dicts = [] #for fitted_file, coverage_file in zip(args.fitted_files, args.coverage_files): for fitted_file in args.fitted_files: fitted_model = load_model(fitted_file) #coverage_dict = pickle_from_file(coverage_file) fitted_dicts.append({"model": fitted_model}) #"coverage_dict": coverage_dict}) print("fitted dicts", len(fitted_dicts)) # Do all the plotting new_data, _ = orig_data_dict["data_gen"].create_data(args.num_test) #plot_PI_diam(fitted_dicts, new_data, args) #plot_coverages(fitted_dicts, new_data, args) #plot_accept_probs( # [d["model"] for d in fitted_dicts], # new_data, # args) if args.num_p == 2: plot_accepted_rejected_region(orig_data_dict, [d["model"] for d in fitted_dicts], args)
def main(dataset_path, working_dir, testing_path, testing_working_dir, dimensions, batch_size, number_classes, epochs): def generator_augmented(): while True: while not dataset_loader.done(): x, y = dataset_loader.get_training_batch() gen = datagen.flow(x, y, batch_size=batch_size) x_augmented, y_augmented = next(gen) yield np.concatenate((x, x_augmented), axis=0), np.concatenate( (y, y_augmented), axis=0) dataset_loader.reset() def generator(): while True: while not dataset_loader.done(): x, y = dataset_loader.get_training_batch() yield x, y dataset_loader.reset() # model = alexnet(dimensions, number_classes).get_model() model = load_model() reduce_lr = ReduceLROnPlateau(monitor='loss', factor=0.1, patience=5, min_lr=0.001) sgd_optimizer = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.0005) dataset_loader = imagerecognition.dataset_loader(dataset_path, working_dir, dimensions, batch_size) datagen = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, horizontal_flip=True, vertical_flip=True) model.compile(sgd_optimizer, loss='categorical_crossentropy', metrics=['accuracy']) model.fit_generator(generator(), steps_per_epoch=dataset_loader.length / batch_size, epochs=epochs, callbacks=[reduce_lr]) dataset_loader.delete_from_disk() save_model(model) dataset_loader = imagerecognition.dataset_loader(testing_path, testing_working_dir, dimensions, batch_size) print('Testing...') consolidated_images, consolidated_labels = dataset_loader.get_training_batch( ) while not dataset_loader.done(): temp_images, temp_labels = dataset_loader.get_training_batch() consolidated_images = np.concatenate( (consolidated_images, temp_images), axis=0) consolidated_labels = np.concatenate( (consolidated_labels, temp_labels), axis=0) print(model.evaluate(consolidated_images, consolidated_labels)) dataset_loader.delete_from_disk()
def main(argv): args = argparser().parse_args(argv[1:]) examples = load_examples(args.data) clf, vecf = load_model(args.model) X = vecf.transform([e.text for e in examples]) for e, c, s in zip(examples, clf.predict(X), clf.decision_function(X)): text = e.text if args.truncate is None else e.text[:args.truncate] print('{}\t{}\t{}\t{}\t{}'.format(e.id_, e.class_, c, s, text)) return 0
def __init__(self, model_data_path=TF_FEAT_MODEL_PATH, image_w=TF_FEAT_IMG_W, image_h=TF_FEAT_IMG_H): self.image_w = image_w self.image_h = image_h self.graph = tf.Graph() self.sess = tf.Session(graph=self.graph) start_time = time() with self.graph.as_default(): load_model(model_data_path) gd = self.sess.graph.as_graph_def() self.image_ph = tf.get_default_graph().get_tensor_by_name( "input:0") self.score = tf.get_default_graph().get_tensor_by_name( "confidence_st:0") self.model_data_path = model_data_path end_time = time() print('load model use {:.2f}ms'.format((end_time - start_time) * 1000))
def main(argv): args = argument_parser('serve').parse_args(argv[1:]) session = tf.Session() graph = tf.get_default_graph() with graph.as_default(): with session.as_default(): app.model, app.tokenizer, app.labels, app.model_config = load_model( args.model_dir) app.session = session app.graph = graph app.run(port=args.port, debug=True) return 0
def main(argv): args = argparser().parse_args(argv[1:]) print('loading model from {} ...'.format(args.model), file=sys.stderr, flush=True) clf, vecf = load_model(args.model) print('loaded model from {} ...'.format(args.model), file=sys.stderr, flush=True) for fn in args.data: print('processing {} ...'.format(os.path.basename(fn)), file=sys.stderr, flush=True) process(fn, clf, vecf, args) print('completed {}.'.format(os.path.basename(fn)), file=sys.stderr, flush=True) return 0
def run_evaluations(model_file: str, model_name: str, data_file: str, feature_columns: List[str], summary_file: str): model = common.load_model(model_file) _, X, y = common.read_data_from_file(data_file, "home_win", feature_columns) eval_results = evaluate.evaluate(f"{model_name}", model, X, y) add_to_json_summary(summary_file, eval_results) accuracy, _ = evaluate.calculate_accuracy(model, X, y) return accuracy
def saveData2(file_name, out_file_name): model = common.load_model("google") time1 = time.time() train_qn_pairs = pickle.load(open(file_name, "rb")) time2 = time.time() print("Loaded Pickle : %f min" % ((time2 - time1) / 60)) train_data = get_embedded_sentence(train_qn_pairs, model) time3 = time.time() model = None train_qn_pairs = None gc.collect() print("Obtained Embeddings: %f min" % ((time3 - time2) / 60)) h5f = h5py.File(out_file_name, 'w') h5f.create_dataset('embeddings', train_data.shape, data=train_data) h5f.close()
def main(args=sys.argv[1:]): args = parse_args(args) data_dict = pickle_from_file(args.data_file) test_data, _ = data_dict["data_gen"].create_data(args.num_test, args.seed) args.num_p = test_data.x.shape[1] fitted_model = load_model(args.fitted_file) # Look at the region we accepted plot_accepted_rejected_region(data_dict, fitted_model, args) # Look at how good the density estimates are in the # accept vs reject region plot_densities(test_data, fitted_model, args) recalibrated_dict = pickle_from_file(args.recalibrated_file) check_recalibration_covered(fitted_model, recalibrated_dict, test_data)
def run_on_test_data(): cnn_model = keras.models.load_model('cnn_model.h5') test_questions = common.read_qp_dump('data/test_qn_pairs.p') model = common.load_model("google") predictions = np.zeros(len(test_questions)) submission = open('cnn-predictions.csv', 'w') print("test_id,is_duplicate", file = submission) for ind, question_pair in enumerate(test_questions): combined = np.zeros((1,2*maxlen,embedding_dims)) v1 = sentence2vec(question_pair.question_1, model) v2 = sentence2vec(question_pair.question_2, model) if(len(v1)>0 and len(v2)>0): combined[0,:min(maxlen,len(v1)),:] = v1[:maxlen,:] combined[0, maxlen:(maxlen+min(maxlen,len(v2))),:] = v2[:maxlen,:] print("%s,%f"%(question_pair.id, cnn_model.predict_proba(combined, verbose = False)[0][0]), file = submission) else: print("%s,0.37"%(question_pair.id, ), file = submission) submission.close()
def calibration(data_file: str, model_file: str, feature_columns: List[str]): model = common.load_model(model_file) data, X, y = common.read_data_from_file(data_file, "home_win", feature_columns) predictions = model.predict(X) probabilities = model.predict_proba(X) calibration_map: Dict = {} for idx, val in enumerate(predictions): true_outcome = y[idx] predicted_outcome = predictions[idx] confidence = float(max(probabilities[idx])) # calibration bits calibration_key = int(confidence * 100) calibration_key = calibration_key - (calibration_key % 5) if calibration_key not in calibration_map: calibration_map[calibration_key] = (0, 0) wins_losses = calibration_map[calibration_key] if predicted_outcome == true_outcome: wins_losses = (wins_losses[0] + 1, wins_losses[1]) else: wins_losses = (wins_losses[0], wins_losses[1] + 1) calibration_map[calibration_key] = wins_losses # end calibration with open("calibration.csv", "w", newline='') as o: writer = csv.writer(o) writer.writerow(["index", "predicted", "actual", "number_of_games"]) for pct in calibration_map: wins_losses = calibration_map[pct] number_of_games = wins_losses[0] + wins_losses[1] true_pct = wins_losses[0] / number_of_games true_pct = int(true_pct * 100) # don't bother with small sample size if number_of_games > 20: writer.writerow([pct, pct, true_pct, number_of_games])
def main(): if (edgetpu==1): mdl = model_edgetpu else: mdl = model interpreter, labels =cm.load_model(model_dir,model_edgetpu,lbl,edgetpu) fps=1 while True: start_time=time.time() #----------------Capture Camera Frame----------------- ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = cv2.flip(cv2_im, 0) cv2_im = cv2.flip(cv2_im, 1) cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) #-------------------Inference--------------------------------- cm.set_input(interpreter, pil_im) interpreter.invoke() objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k) #-----------------other------------------------------------ track_object(objs,labels)#tracking <<<<<<< fps = round(1.0 / (time.time() - start_time),1) print("*********FPS: ",fps,"************") #----------------------------------------------------- cap.release() cv2.destroyAllWindows()
def main(args=sys.argv[1:]): args = parse_args(args) np.random.seed(args.seed) # Read all data orig_data_dict = pickle_from_file(args.data_file) # Get the appropriate datasplit split_dict = pickle_from_file(args.data_split_file) recalib_data = orig_data_dict["train"].subset( split_dict["recalibrate_idxs"]) args.num_p = recalib_data.x.shape[1] # Load models fitted_models = [ load_model(fitted_file) for fitted_file in args.fitted_files ] # Do all the plotting if args.num_p == 2: plot_accepted_rejected_region(orig_data_dict, fitted_models, args)
def daily_evaluation(data_file: str, model_file: str, feature_columns: List[str], summary_file: str): model = common.load_model(model_file) data, X, y = common.read_data_from_file(data_file, "home_win", feature_columns) predictions = model.predict(X) probabilities = model.predict_proba(X) strat = strategies.all_strategies() for idx, val in enumerate(predictions): true_outcome = y[idx] predicted_outcome = predictions[idx] confidence = float(max(probabilities[idx])) away = data.iloc[idx]["away"] home = data.iloc[idx]["home"] date = data.iloc[idx]["date"] if true_outcome == 1: winner = home else: winner = away if predicted_outcome == 1: predicted_winner = home else: predicted_winner = away for s in strat: s.evaluate(data.iloc[idx]) add_to_json_summary( summary_file, [date, away, home, winner, predicted_winner, confidence]) for s in strat: s.get_results()
def main(argv): args = argument_parser('test').parse_args(argv[1:]) model, tokenizer, labels, config = load_model(args.model_dir) test_labels, test_texts = load_tsv_data(args.test_data, args) max_seq_len = config['max_seq_length'] replace_span = config['replace_span'] label_map = {t: i for i, t in enumerate(labels)} inv_label_map = {v: k for k, v in label_map.items()} test_tok = tokenize_texts(test_texts, tokenizer) test_x = encode_tokenized(test_tok, tokenizer, max_seq_len, replace_span) test_y = [label_map[l] for l in test_labels] probs = model.predict(test_x, batch_size=args.batch_size) preds = np.argmax(probs, axis=-1) correct, total = sum(g == p for g, p in zip(test_y, preds)), len(test_y) print('Test accuracy: {:.1%} ({}/{})'.format(correct / total, correct, total)) return 0
output.append(f"{df.iloc[idx]['away_streak']:.2f}") output.append(f"{df.iloc[idx]['home_streak']:.2f}") output.append(f"{df.iloc[idx]['away_pct']:.2f}") output.append(f"{df.iloc[idx]['home_pct']:.2f}") output.append(f"{df.iloc[idx]['away_diff']:.2f}") output.append(f"{df.iloc[idx]['home_diff']:.2f}") summary.append(output) return summary stats = importer.generate_stats(f"input\\nba\\2018.csv") model_file, feature_columns = common.read_model_definition("nba_model.csv") model = common.load_model(model_file) data = [] games = [] dt = datetime.datetime.now() lines = scraper.get_gameday_lines(dt) index = domain.GameLineIndex(lines) for g in scraper.get_games(dt): game_info = domain.NBAGame(1, date = dt) game_info.away = g[0] game_info.home = g[1]
def main(args=sys.argv[1:]): args = parse_args(args) np.random.seed(args.seed) logging.basicConfig(format="%(message)s", filename=args.log_file, level=logging.DEBUG) logging.info(args) data_dict = pickle_from_file(args.data_file) test_data, _ = data_dict["data_gen"].create_data(args.num_test) fitted_models = [] agg_dict = {} for fitted_file, coverage_file in zip(args.fitted_files, args.coverage_files): fitted_model = load_model(fitted_file) fitted_models.append(fitted_model) coverage_dict = pickle_from_file(coverage_file) for pi_alpha, inference_dict in coverage_dict.items(): if pi_alpha not in agg_dict: agg_dict[pi_alpha] = [] agg_dict[pi_alpha].append(inference_dict) unif_x = data_dict["support_sim_settings"].support_unif_rvs(args.num_test) unif_test_data = data_dict["data_gen"].create_data_given_x(unif_x) coverage_agg_results = {} for pi_alpha, inference_dicts in agg_dict.items(): aggregator = DecisionIntervalAggregator(fitted_models, pi_alpha, inference_dicts) indiv_test_datas = [ data_dict["data_gen"].create_data(args.num_test)[0] for _ in fitted_models ] indiv_test_inf_dicts = [ DecisionIntervalRecalibrator(fitted_model, pi_alpha).recalibrate(indiv_test_data) for fitted_model, indiv_test_data in zip(fitted_models, indiv_test_datas) ] individual_is_covereds = [] for test_coverage_dict, inf_dict in zip(indiv_test_inf_dicts, inference_dicts): print(inf_dict) test_coverage = test_coverage_dict["cov_given_accept"]["mean"] test_coverage_ci = get_normal_ci( test_coverage_dict["cov_given_accept"], args.ci_alpha) individual_ci = get_normal_ci(inf_dict["cov_given_accept"], args.ci_alpha) indiv_covered = individual_ci[ 0] <= test_coverage and test_coverage <= individual_ci[1] logging.info("indiv est %f ci %s", inf_dict["cov_given_accept"]["mean"], individual_ci) logging.info("true indiv %f ci %s", test_coverage, test_coverage_ci) logging.info("indiv is covered? %s", indiv_covered) individual_is_covereds.append(indiv_covered) # Calculate the width of the individual CI diams for comparison individual_ci_diams = get_individual_ci_diams(inference_dicts, args.ci_alpha) # Evaluate if the true coverage value is covered agg_cov_given_accept_dict = aggregator.calc_agg_cover_given_accept( args.ci_alpha) true_cov_given_accept_dict = aggregator.eval_cov_given_accept( test_data)["cov_given_accept"] true_cov_given_accept = true_cov_given_accept_dict["mean"] agg_ci = agg_cov_given_accept_dict["ci"] is_covered = true_cov_given_accept > agg_ci[ 0] and true_cov_given_accept < agg_ci[1] # Evaluate coverage if using independence assumption indpt_aggregator = DecisionIntervalIndptAggregator( fitted_models, pi_alpha, inference_dicts) indpt_agg_cov_given_accept_dict = indpt_aggregator.calc_agg_cover_given_accept( args.ci_alpha) indpt_ci = indpt_agg_cov_given_accept_dict["ci"] indpt_is_covered = true_cov_given_accept > indpt_ci[ 0] and true_cov_given_accept < indpt_ci[1] coverage_agg_results[pi_alpha] = { "is_covered": { "agg": [is_covered], "independent": [indpt_is_covered], "individual": individual_is_covereds }, "ci_diams": { "agg": [agg_ci[1] - agg_ci[0]], "independent": [indpt_ci[1] - indpt_ci[0]], "individual": individual_ci_diams }, "true_cov": { "agg": [true_cov_given_accept], "independent": [true_cov_given_accept], "individual": [ test_inf_dict["cov_given_accept"]["mean"] for test_inf_dict in indiv_test_inf_dicts ] } } # Evaluate local coverage local_coverages = assess_local_agg_coverage_true( aggregator, test_data, data_dict["data_gen"]) for key, val in local_coverages.items(): coverage_agg_results[pi_alpha][key] = val logging.info("PI alpha %f", pi_alpha) logging.info("estimated agg cover given accept %f %s", agg_cov_given_accept_dict["mean"], agg_ci) logging.info("indepttt estimated agg cover given accept %f %s", indpt_agg_cov_given_accept_dict["mean"], indpt_ci) logging.info("true cov given accept %f, se %f", true_cov_given_accept, true_cov_given_accept_dict["se"]) logging.info("is covered? %s", is_covered) logging.info("indept is covered? %s", indpt_is_covered) logging.info(coverage_agg_results) pickle_to_file(coverage_agg_results, args.out_file)
def run_main_loop(max_duration, replying_enabled): """The out monitor/reply loop for communicating with Twitter max_duration: Maximum time to run this loop for (seconds) replying_enabled: Reply to tweets? """ logging.info('max_duration=%d, replying_enabled=%s' % (max_duration, replying_enabled)) start_time = time.time() # Delay between loops delay = 0.1 def elapsed(): """Elapsed time since start""" return time.time() - start_time # Load the classification model first # This is critical for classifying tweets for reply # It is helpful for seeing which tweets would be replied to # when we are running in non-replying mode # The model will not be available in early stage of development # before tweets have been saved and labeled model = common.load_model() if replying_enabled: assert model, 'Cannot reply without a classification model' logging.info('Using %s classifier' % model.__class__.__name__) # Lastest tweet id (an integer) is stored as text in LATEST_FILE # We use to prevent re-reading tweets latest_tweet_id = int(file(common.LATEST_FILE, 'rt').read().strip()) if os.path.exists(common.LATEST_FILE) else 0 logging.info('latest_tweet_id=%d' % latest_tweet_id) # Load the tweets that have already been replied to replied_tweets = load_replied_tweets() # Credentials are stored in CREDENTIALS_FILE as text lines of key='value' # The keys are: consumer_key, consumer_secret, access_token_key, access_token_secret RE_CREDENTIALS = re.compile(r"(\w+)='([^']+)'") credentials = dict((m.group(1),m.group(2)) for m in RE_CREDENTIALS.finditer(file(common.CREDENTIALS_FILE,'rt').read())) # Create an object that gives access to the Twitter APIs api = twitter.Api(**credentials) # Create an Activity object for generating summary tweets activity = Activity(api) # The main loop. Runs for max_duration seconds with delay seconds # between iterations. while elapsed() + delay < max_duration: time.sleep(delay) # Fetch tweets that were created since the last time we checked latest_tweet_id += 1 scored_tweets = fetch_latest_scored_tweets(api, model, latest_tweet_id) if scored_tweets: # Record all tweets scored_tweets.sort(key = lambda t: (not t._replyable, -t._score, t._id)) latest_tweet_id = max([t._id for t in scored_tweets]) file(common.LATEST_FILE, 'wt').write(str(latest_tweet_id)) record_tweets(scored_tweets) replyable_tweets = [t for t in scored_tweets if t._replyable] # Reply to all the tweeets that we should reply to if replying_enabled and replyable_tweets: reply_to_tweets(api, activity, replied_tweets, replyable_tweets) # Back off if there were no matching tweets delay = 10 if scored_tweets else delay * 2 delay = max(10, min((abs(elapsed() - max_duration)/10), delay)) logging.info('Found %3d replyable of %3d relevant results, latest_id=%d, sleeping %5.1f sec, running %4d sec (%4d remaining)' % ( len([t for t in scored_tweets if t._replyable]), len(scored_tweets), latest_tweet_id, delay, elapsed(), max_duration - elapsed()))
def predict(args,model,processor): model_path = args.output_dir / 'best-model.bin' model = load_model(model, model_path=str(model_path)) test_data = [] with open(str(args.data_dir / "test.json"), 'r') as f: idx = 0 for line in f: json_d = {} line = json.loads(line.strip()) text = line['text'] words = list(text) labels = ['O'] * len(words) json_d['id'] = idx json_d['context'] = " ".join(words) json_d['tag'] = " ".join(labels) json_d['raw_context'] = "".join(words) idx += 1 test_data.append(json_d) pbar = ProgressBar(n_total=len(test_data)) results = [] for step, line in enumerate(test_data): token_a = line['context'].split(" ") input_ids = [processor.vocab.to_index(w) for w in token_a] input_mask = [1] * len(token_a) input_lens = [len(token_a)] model.eval() with torch.no_grad(): input_ids = torch.tensor([input_ids], dtype=torch.long) input_mask = torch.tensor([input_mask], dtype=torch.long) input_lens = torch.tensor([input_lens], dtype=torch.long) input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) features = model.forward_loss(input_ids, input_mask, input_lens, input_tags=None) tags, _ = model.crf._obtain_labels(features, args.id2label, input_lens) label_entities = get_entities(tags[0], args.id2label) json_d = {} json_d['id'] = step json_d['tag_seq'] = " ".join(tags[0]) json_d['entities'] = label_entities results.append(json_d) pbar(step=step) print(" ") output_predic_file = str(args.output_dir / "test_prediction.json") output_submit_file = str(args.output_dir / "test_submit.json") with open(output_predic_file, "w") as writer: for record in results: writer.write(json.dumps(record) + '\n') test_text = [] with open(str(args.data_dir / 'test.json'), 'r') as fr: for line in fr: test_text.append(json.loads(line)) test_submit = [] for x, y in zip(test_text, results): json_d = {} json_d['id'] = x['id'] json_d['label'] = {} entities = y['entities'] words = list(x['text']) if len(entities) != 0: for subject in entities: tag = subject[0] start = subject[1] end = subject[2] word = "".join(words[start:end + 1]) if tag in json_d['label']: if word in json_d['label'][tag]: json_d['label'][tag][word].append([start, end]) else: json_d['label'][tag][word] = [[start, end]] else: json_d['label'][tag] = {} json_d['label'][tag][word] = [[start, end]] test_submit.append(json_d) json_to_text(output_submit_file, test_submit)
def update_class_file(): """We store list of labelled tweets in *.cls files in the format class label | message e.g. n | If I see one more back to school commercial I'm giving my eyes a paper cut. y | i got lemon on my finger and it stings .-. stupid paper cut -.- (Only the tweet text is stored here. The other tweet infornation is not stored as we used only the tweet text for classification) common.LATEST_CLASS_FILE (see common.py for actual name) is the file where we keep our most up-to-list of labelled tweets This function updates common.LATEST_CLASS_FILE using common.TWEETS_FILE, which contains all the tweets that have undergone simple screening and we have saved. It also guesses labels for each new tweet using the latest classification model. (We track the tweet id of the latest tweet in common.LATEST_CLASS_FILE) """ # The lastest labelled tweet id (an integer) is stored as text in LATEST_CLASS_FILE latest_labelled_tweet_id = int(file(common.LATEST_CLASS_FILE, 'rt').read().strip()) if os.path.exists(common.LATEST_CLASS_FILE) else 0 previous_tweet_id = latest_labelled_tweet_id # Read the classification model. This will be used to guess tweet classifications model = common.load_model() # Read the tweets from TWEETS_FILE, label them and store them # in labelled_messages labelled_messages = [] fp = open(common.TWEETS_FILE, 'rt') for line in fp: line = line.strip(' \n') # Skip empty lines if not line: continue # Skip incorrectly formatted lines try: id_s,_,_,message = [pt.strip() for pt in line.split('|')] id = int(id_s) except ValueError: print 'ValueError', line continue # Skip tweets we have already saved if id <= latest_labelled_tweet_id: continue # Filter out messages that are not even allowed for training if not filters.is_allowed_for_training(message): continue cls,_ = model.classify(message) labelled_messages.append([definitions.AUTO_CLASSES_LABELS[cls], message]) latest_labelled_tweet_id = max(id, latest_labelled_tweet_id) fp.close() print 'found %d new tweets' % len(labelled_messages) print 'before: latest_labelled_tweet_id=%d' % previous_tweet_id print 'after: latest_labelled_tweet_id=%d' % latest_labelled_tweet_id if latest_labelled_tweet_id == previous_tweet_id: print 'Nothing to do' exit() # Save the current labelled data file shutil.copyfile(common.CLASS_FILE, '%s.%d' % (common.CLASS_FILE, previous_tweet_id)) # Add the new entries to the labelled data file fp = open(common.CLASS_FILE, 'at') for i,t in enumerate(labelled_messages): fp.write('%s | %s\n' % (t[0], t[1])) fp.close() print 'Added %d new labelled messages to %s' % (len(labelled_messages), common.CLASS_FILE) # Update the latest labelled entry id file(common.LATEST_CLASS_FILE, 'wt').write(str(latest_labelled_tweet_id))
def main(): if (edgetpu == 1): mdl = model_edgetpu else: mdl = model interpreter, labels = cm.load_model(model_dir, mdl, lbl, edgetpu) fps = 1 arr_dur = [0, 0, 0] #while cap.isOpened(): while True: start_time = time.time() #----------------Capture Camera Frame----------------- start_t0 = time.time() ret, frame = cap.read() if not ret: break cv2_im = frame cv2_im = cv2.flip(cv2_im, 0) cv2_im = cv2.flip(cv2_im, 1) cv2_im_rgb = cv2.cvtColor(cv2_im, cv2.COLOR_BGR2RGB) pil_im = Image.fromarray(cv2_im_rgb) arr_dur[0] = time.time() - start_t0 #cm.time_elapsed(start_t0,"camera capture") #---------------------------------------------------- #-------------------Inference--------------------------------- start_t1 = time.time() cm.set_input(interpreter, pil_im) interpreter.invoke() objs = cm.get_output(interpreter, score_threshold=threshold, top_k=top_k) arr_dur[1] = time.time() - start_t1 #cm.time_elapsed(start_t1,"inference") #---------------------------------------------------- #-----------------other------------------------------------ start_t2 = time.time() track_object(objs, labels) #tracking <<<<<<< if cv2.waitKey(1) & 0xFF == ord('q'): break cv2_im = append_text_img1(cv2_im, objs, labels, arr_dur, arr_track_data) ret, jpeg = cv2.imencode('.jpg', cv2_im) pic = jpeg.tobytes() #Flask streaming yield (b'--frame\r\n' b'Content-Type: image/jpeg\r\n\r\n' + pic + b'\r\n\r\n') arr_dur[2] = time.time() - start_t2 fps = round(1.0 / (time.time() - start_time), 1) print("*********FPS: ", fps, "************") cap.release() cv2.destroyAllWindows()
def step2(model, data, args): print '### STEP 2: Train for ordinal regression task' pretrained_snapshot_fname = 'model_best_loss.th' train_loader, val_loader, test_loader = to_ordinal_data(data, args) n_samples_train = len(train_loader.dataset) n_samples_val = len(val_loader.dataset) n_samples_test = len(test_loader.dataset) best_val_acc = None test_acc = None model.to_ordinal() saved_model = load_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') if saved_model is not None: print 'Loading pretrained model:', pretrained_snapshot_fname model = saved_model model.cuda() else: logfile = open(os.path.join(args.workDir, 'log_2.txt'), 'wb') model.cuda() optimizer = torch.optim.Adam(model.parameters(), args.learningRate2) since = time.time() for epoch in trange(args.nEpochs2 + 1, desc='Epochs'): avg_loss = train(model, optimizer, epoch, train_loader, logfile, args) val_loss, val_acc, n_correct = evaluate(model, val_loader, args) train_loader.dataset.sample_tuples() val_loader.dataset.sample_tuples() if best_val_acc is None or best_val_acc < val_acc: best_val_acc = val_acc tqdm.write('Snapshotting best model: ' + pretrained_snapshot_fname) save_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') logline = 'Epoch {:3d}/{}] train_avg_loss = {:.4f}, val_avg_loss = {:.4f}, val_accuracy = {}/{} ({:.2f}%, Best: {:.2f}%)' tqdm.write( logline.format(epoch, args.nEpochs2, avg_loss, val_loss, n_correct, n_samples_val, val_acc, best_val_acc)) time_elapsed = time.time() - since print 'Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60) model = load_model(model, pretrained_snapshot_fname, args, subdir='snapshots_2') model.cuda() # RANK TESTING ------------ if not args.skipTest: test_loss, test_acc, n_correct = evaluate(model, test_loader, args) logline = 'TEST] test_avg_loss = {:.4f}, test_accuracy = {}/{} ({:.2f}%)' print logline.format(test_loss, n_correct, n_samples_test, test_acc) return model, { 'Best Val Rank Accuracy': best_val_acc, 'Test Rank Accuracy': test_acc }
help='use pre-trained model') parser.add_argument('--host', default="localhost", metavar='PATH', help="Location of crayon server") parser.add_argument('--port', default=8899, metavar='N', help="Port of Crayon server (Default:8899)") if __name__ == "__main__": args = parser.parse_args() cudnn.Benchmark = True model, unfreeze = load_model(args.arch, args.classes, args.pretrained) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) client = None logger = None try: client = CrayonClient(args.host, args.port) client.remove_experiment("pytorch_logging") logger = client.create_experiment("pytorch_logging")
def update_class_file(): """We store list of labelled tweets in *.cls files in the format class label | message e.g. n | If I see one more back to school commercial I'm giving my eyes a paper cut. y | i got lemon on my finger and it stings .-. stupid paper cut -.- (Only the tweet text is stored here. The other tweet infornation is not stored as we used only the tweet text for classification) common.LATEST_CLASS_FILE (see common.py for actual name) is the file where we keep our most up-to-list of labelled tweets This function updates common.LATEST_CLASS_FILE using common.TWEETS_FILE, which contains all the tweets that have undergone simple screening and we have saved. It also guesses labels for each new tweet using the latest classification model. (We track the tweet id of the latest tweet in common.LATEST_CLASS_FILE) """ # The lastest labelled tweet id (an integer) is stored as text in LATEST_CLASS_FILE latest_labelled_tweet_id = int( file(common.LATEST_CLASS_FILE, 'rt').read().strip()) if os.path.exists( common.LATEST_CLASS_FILE) else 0 previous_tweet_id = latest_labelled_tweet_id # Read the classification model. This will be used to guess tweet classifications model = common.load_model() # Read the tweets from TWEETS_FILE, label them and store them # in labelled_messages labelled_messages = [] fp = open(common.TWEETS_FILE, 'rt') for line in fp: line = line.strip(' \n') # Skip empty lines if not line: continue # Skip incorrectly formatted lines try: id_s, _, _, message = [pt.strip() for pt in line.split('|')] id = int(id_s) except ValueError: print 'ValueError', line continue # Skip tweets we have already saved if id <= latest_labelled_tweet_id: continue # Filter out messages that are not even allowed for training if not filters.is_allowed_for_training(message): continue cls, _ = model.classify(message) labelled_messages.append( [definitions.AUTO_CLASSES_LABELS[cls], message]) latest_labelled_tweet_id = max(id, latest_labelled_tweet_id) fp.close() print 'found %d new tweets' % len(labelled_messages) print 'before: latest_labelled_tweet_id=%d' % previous_tweet_id print 'after: latest_labelled_tweet_id=%d' % latest_labelled_tweet_id if latest_labelled_tweet_id == previous_tweet_id: print 'Nothing to do' exit() # Save the current labelled data file shutil.copyfile(common.CLASS_FILE, '%s.%d' % (common.CLASS_FILE, previous_tweet_id)) # Add the new entries to the labelled data file fp = open(common.CLASS_FILE, 'at') for i, t in enumerate(labelled_messages): fp.write('%s | %s\n' % (t[0], t[1])) fp.close() print 'Added %d new labelled messages to %s' % (len(labelled_messages), common.CLASS_FILE) # Update the latest labelled entry id file(common.LATEST_CLASS_FILE, 'wt').write(str(latest_labelled_tweet_id))