def main(seq_file, ss_file, ss8_file, acc_file, acc20_file, src_bio_file, output_file): print("load data...") data = gen_data(seq_file, ss_file, ss8_file, acc_file, acc20_file, src_bio_file) x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio = \ data['test']['seq'], data['test']['bigram'], data['test']['trigram'], \ data['test']['acc'], data['test']['acc20'], data['test']['ss'], data['test']['ss8'], \ data['test']['src_bio'] print("make data...") x_test = make_data(x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio) filepath = './result/model/EPSOL.hdf5' best_model = utils.load_model(filepath) [pred_test, pred_prob_test] = get_classification_prediction(best_model, x_test) save_classification_prediction(pred_test, pred_prob_test, output_file) print("-----------------------------------------------") print("EPSOL prediction finished!") print("-----------------------------------------------")
def main(): print("load data...") data = load_data() x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train = \ data['train']['seq'], data['train']['bigram'], data['train']['trigram'], \ data['train']['acc'], data['train']['acc20'], data['train']['ss'], data['train']['ss8'], \ data['train']['src_bio'], data['train']['label'] x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev = \ data['dev']['seq'], data['dev']['bigram'], data['dev']['trigram'], \ data['dev']['acc'], data['dev']['acc20'], data['dev']['ss'], data['dev']['ss8'], \ data['dev']['src_bio'], data['dev']['label'] x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio, y_test = \ data['test']['seq'], data['test']['bigram'], data['test']['trigram'], \ data['test']['acc'], data['test']['acc20'], data['test']['ss'], data['test']['ss8'], \ data['test']['src_bio'], data['test']['label'] print("make data...") x_train, y_oh_train = make_data( x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train) x_dev, y_oh_dev = make_data( x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev) x_test, y_oh_test = make_data( x_test_seq, x_test_bi, x_test_tri, x_test_acc, x_test_acc20, x_test_ss, x_test_ss8, x_test_bio, y_test) model = Models.EPSOL().get_model() model.compile(loss='binary_crossentropy', optimizer=utils.get_adam_optim(), metrics=['accuracy']) print(model.summary()) model.fit(x_train, y_oh_train, batch_size=64, epochs=10, validation_data=(x_dev, y_oh_dev), callbacks=get_callbacks()) filepath = './result/model/' + model_name + '.hdf5' # model.save(filepath) # print("save model ok!") best_model = utils.load_model(filepath) [pred_test,pred_prob_test] = get_classification_prediction(best_model,x_test) save_classification_prediction(pred_test,pred_prob_test) print("save result ok!")
def check(): m = load_model() fname = os.path.join(csv_folder, csv_name_drop_unk) df24 = pd.read_csv(fname, comment='#') # df24['norm_kepid'] = df24['kepid'].apply(norm_kepid) # # df24['int_label'] = df24['av_training_set'].apply( # lambda x: 1 if x == 'PC' else 0) # # df24.sort_values(by=['int_label', 'norm_kepid', 'tce_plnt_num'], # ascending=[False, True, True], # inplace=True, kind='mergesort') # count_kepid = -1 kepids = set(df24['kepid'].values) prev_kepid = None count, total = 1, len(df24) diff_count = 0 processed = 0 with open('diff_kepid.txt', 'w') as f: with open('unk_kepid.txt', 'w') as f_unk: for kepid in kepids: # if prev_kepid != kepid: # count_kepid += 1 # prev_kepid = kepid res = test_kepid(m, kepid, dr24=True) sub_df = df24[df24['kepid'] == int(kepid)] for plnt, prob in res.items(): cls = sub_df[sub_df['tce_plnt_num'] == int( plnt)]['av_training_set'].values[0] processed += 1 if (cls == 'PC' and prob < 0.5) \ or (cls != 'PC' and prob > 0.5): # predict wrongly diff_count += 1 print(f'diff rate: {diff_count / processed:.3f}') print(f'{kepid}-{plnt} prob: {prob}', file=f) continue print(f'{count}/{total}') count += 1
# init flask app instance app = Flask(__name__) @app.route("/") def index(): print(model.summary()) return render_template("index.html") @app.route('/predict_message', methods=["POST"]) def predict_message(): if request.method == "POST": message = request.form["message"] try: prediction = predict(message, model, pipeline) return render_template("index.html", prediction=prediction[0][0]) except IndexError as e: logging.critical(e) return render_template("index.html") if __name__ == '__main__': model = load_model( "./data/models_data/model_conv_drop_false_1_5_new_data.json", "./data/models_data/model_weights_conv_drop_false_1_5_new_data.h5") pipeline = create_pipeline( key_word_path="data/pickled/key_word_map_new_data_1.pkl") app.run(debug=True, port=4000, host="0.0.0.0")
def main(opt): train_data, valid_data = get_train_valid_split_data_names(opt.img_folder, opt.ano_folder, valid_size=1/8) # データの読み込み print("load data") train_dataset = Phase1Dataset(train_data, load_size=(640, 640), augment=True, limit=opt.limit) print("train data length : %d" % (len(train_dataset))) valid_dataset = Phase1Dataset(valid_data, load_size=(640, 640), augment=False, limit=opt.limit) print("valid data length : %d" % (len(valid_dataset))) # DataLoaderの作成 train_loader = torch.utils.data.DataLoader( train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) valid_loader = torch.utils.data.DataLoader( valid_dataset, batch_size=1, shuffle=False, num_workers=opt.num_workers, pin_memory=True, drop_last=True ) # GPUの設定(PyTorchでは明示的に指定する必要がある) device = torch.device('cuda' if opt.gpus > 0 else 'cpu') # モデルの作成 heads = {'hm': 1} model = get_pose_net(18, heads, 256).to(device) if opt.load_model != '': model, optimizer, start_epoch = load_model( model, opt.load_model, optimizer) # 最適化手法を定義 if opt.optimizer == "SGD": optimizer = torch.optim.SGD(model.parameters(), lr=opt.lr)#, momentum=m, dampening=d, weight_decay=w, nesterov=n) elif opt.optimizer == "Adam": optimizer = torch.optim.Adam(model.parameters(), opt.lr) elif opt.optimizer == "RAdam": optimizer = optim.RAdam(model.parameters(), lr=opt.lr) # 損失関数を定義 criterion = HMLoss() # 学習率のスケジューリングを定義 scheduler = CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=1, eta_min=0.00001) start_epoch = 0 best_validation_loss = 1e10 # 保存用フォルダの作成 os.makedirs(os.path.join(opt.save_dir, opt.task, 'visualized'), exist_ok=True) # 学習 TODO エポック終了時点ごとにテスト用データで評価とモデル保存 for epoch in range(start_epoch + 1, opt.num_epochs + 1): print("learning rate : %f" % scheduler.get_last_lr()[0]) train(train_loader, model, optimizer, criterion, device, opt.num_epochs, epoch) if opt.optimizer == "SGD": scheduler.step() # 最新モデルの保存 save_model(os.path.join(opt.save_dir, opt.task, 'model_last.pth'), epoch, model, optimizer, scheduler) # テスト用データで評価 validation_loss, accumulate_datas = valid(valid_loader, model, criterion, device) # ベストスコア更新でモデルの保存 if validation_loss < best_validation_loss: best_validation_loss = validation_loss save_model(os.path.join(opt.save_dir, opt.task, 'model_best.pth'), epoch, model, optimizer, scheduler) print("saved best model") visualization(os.path.join(opt.save_dir, opt.task, 'visualized'), accumulate_datas)
def compare(threashhold=0.5): global _same, _total, _all_diff, _all_fp, _all_fn, _wrong_local_view_kepids fname = path.join(path.dirname(__file__), 'robo.csv') df = pd.read_csv(fname) kepids_and_plnt = df[['kepid', 'tce_plnt_num', 'pred_class']] m = load_model() seen = {} _same = 0 _total = 0 fp, fn = 0, 0 count = 1 # kepid_count = -1 # prev_kepid = None _wrong_local_view_kepids = [] _all_diff = [] _all_fp = [] _all_fn = [] # signal.signal(signal.SIGINT, sig_handler) for (kepid, plnt_num, pred_class) in __read_df(kepids_and_plnt): # if prev_kepid != kepid: # kepid_count += 1 # prev_kepid = kepid try: if kepid not in seen: res = test_kepid(m, kepid) seen[kepid] = res prob_of_pc = seen[kepid][plnt_num] class_of_pc = '1' if float(prob_of_pc) > threashhold else '0' if str(pred_class) == class_of_pc: _same += 1 else: print(f"diff: {kepid}-{plnt_num}") _all_diff.append(f'{kepid}-{plnt_num}') if str(pred_class) == '1': fn += 1 _all_fn.append(f'{kepid}-{plnt_num} ({prob_of_pc})') if str(pred_class) == '0': fp += 1 _all_fp.append(f'{kepid}-{plnt_num} ({prob_of_pc})') _total += 1 print( f"{count}/{len(kepids_and_plnt)}, precision: {_same / _total * 100:.3f}%" ) count += 1 except Exception as e: print(e) _wrong_local_view_kepids.append(kepid) _write_output()
def test(opt): # set device to cpu/gpu if opt.use_gpu: device = torch.device("cuda", opt.gpu_id) else: device = torch.device("cpu") transform_test = transforms.Compose([ transforms.ToTensor(), ]) # get CIFAR10/CIFAR100 test set if opt.dataset == "CIFAR10": test_set = CIFAR10(root="./data", train=False, download=True, transform=transform_test) else: test_set = CIFAR100(root="./data", train=False, download=True, transform=transform_test) num_classes = np.unique(test_set.targets).shape[0] # get test dataloader test_loader = DataLoader(test_set, batch_size=opt.batch_size, num_workers=opt.num_workers, shuffle=False) print( "Dataset -- {}, Metric -- {}, Train Mode -- {}, Test Mode -- {}, Blackbox -- {}, Backbone -- {}" .format(opt.dataset, opt.metric, opt.train_mode, opt.test_mode, opt.test_bb, opt.backbone)) print("Test iteration batch size: {}".format(opt.batch_size)) print("Test iterations per epoch: {}".format(len(test_loader))) model = load_model(opt.dataset, opt.metric, opt.train_mode, opt.backbone, opt.s, opt.m) model.to(device) if opt.use_gpu: model = DataParallel(model).to(device) # load balck box model for black box attacks if opt.test_bb: # Test Black box attacks for different metrics if opt.bb_metric != "softmax": attack_model = load_model_underscore( opt.dataset, opt.bb_metric, "clean", opt.backbone, opt.s, opt.m) #I trained other bb models in at mode for some reason model.to(device) if opt.use_gpu: model = DataParallel(model).to(device) else: model_bb = load_model(opt.dataset, "bb", "", opt.backbone, opt.s, opt.m) model_bb.to(device) if opt.use_gpu: model_bb = DataParallel(model_bb).to(device) attack_model = model_bb else: attack_model = model # get prediction results for model y_true, y_pred = [], [] for ii, data in enumerate(test_loader): # load data batch to device images, labels = data images = images.to(device) labels = labels.to(device).long() predictions = labels.cpu().numpy() # random restarts for pgd attack for restart_cnt in range(opt.test_restarts): #print("Batch {}/{} -- Restart {}/{}\t\t\t\t".format(ii+1,len(test_loader), restart_cnt+1, opt.test_restarts)) # perform adversarial attack update to images if opt.test_mode == "fgsm": adv_images = fgsm(attack_model, images, labels, 8. / 255) elif opt.test_mode == "bim": adv_images = bim(attack_model, images, labels, 8. / 255, 2. / 255, 7) elif opt.test_mode == "pgd_7": adv_images = pgd(attack_model, images, labels, 8. / 255, 2. / 255, 7) elif opt.test_mode == "pgd_20": adv_images = pgd(attack_model, images, labels, 8. / 255, 2. / 255, 20) elif opt.test_mode == "mim": adv_images = mim(attack_model, images, labels, 8. / 255, 2. / 255, 0.9, 40) else: adv_images = images # get feature embedding from resnet and prediction _, predictions_i = model(adv_images, labels) # accumulate test results predictions_i = torch.argmax(predictions_i, 1).cpu().numpy() labels_i = labels.cpu().numpy() predictions[np.where( predictions_i != labels_i)] = predictions_i[np.where( predictions_i != labels_i)] y_true.append(labels.cpu().numpy()) y_pred.append(predictions) y_true, y_pred = np.concatenate(y_true), np.concatenate(y_pred) print(classification_report(y_true, y_pred)) print("Accuracy: {}".format(accuracy_score(y_true, y_pred))) return y_true, y_pred
def __init__(self): loaded = load_best_model_exp(FLAGS.model) self.__dict__.update(loaded) self.best_model = load_model(self.best_model_dir, self.best_model_params) self.data_loader = create_data_loader() self.data_loader.load_data()
def main(): print("load data...") data = load_data() x_train_seq, x_train_bi, x_train_tri, x_train_acc, x_train_acc20, x_train_ss, x_train_ss8, x_train_bio, y_train = \ data['train']['seq'], data['train']['bigram'], data['train']['trigram'], \ data['train']['acc'], data['train']['acc20'], data['train']['ss'], data['train']['ss8'], \ data['train']['src_bio'], data['train']['label'] x_dev_seq, x_dev_bi, x_dev_tri, x_dev_acc, x_dev_acc20, x_dev_ss, x_dev_ss8, x_dev_bio, y_dev = \ data['dev']['seq'], data['dev']['bigram'], data['dev']['trigram'], \ data['dev']['acc'], data['dev']['acc20'], data['dev']['ss'], data['dev']['ss8'], \ data['dev']['src_bio'], data['dev']['label'] x_seq_full = np.concatenate((x_train_seq, x_dev_seq), axis=0) x_bi_full = np.concatenate((x_train_bi, x_dev_bi), axis=0) x_tri_full = np.concatenate((x_train_tri, x_dev_tri), axis=0) x_acc_full = np.concatenate((x_train_acc, x_dev_acc), axis=0) x_acc20_full = np.concatenate((x_train_acc20, x_dev_acc20), axis=0) x_ss_full = np.concatenate((x_train_ss, x_dev_ss), axis=0) x_ss8_full = np.concatenate((x_train_ss8, x_dev_ss8), axis=0) x_bio_full = np.concatenate((x_train_bio, x_dev_bio), axis=0) y_full = np.concatenate((y_train, y_dev), axis=0) print("make data...") x_full, y_oh_full = make_data(x_seq_full, x_bi_full, x_tri_full, x_acc_full, x_acc20_full, x_ss_full, x_ss8_full, x_bio_full, y_full) kf = KFold(n_splits=10, shuffle=True, random_state=2021) count = 1 acc_list = [] mcc_list = [] for train_index, test_index in kf.split(y_oh_full): print('Starting CV Iteration: ', str(count)) global model_name model_name = "cv_fold_" + str(count) print(model_name) x_train, y_train, x_test, y_test = make_fold(x_full, y_oh_full, train_index, test_index) model = Models.EPSOL().get_model() model.compile(loss='binary_crossentropy', optimizer=utils.get_adam_optim(), metrics=['accuracy']) # print(model.summary()) model.fit(x_train, y_train, batch_size=64, epochs=10, validation_split=0.1, callbacks=get_callbacks()) filepath = './result/model/' + model_name + '.hdf5' best_model = utils.load_model(filepath) [pred_test, pred_prob_test, acc, mcc, label] = get_classification_prediction(best_model, x_test, y_test) print("-------------------------------------------------------") print("ACC of fold-{} cross-validation: {}".format(str(count), acc)) print("MCC of fold-{} cross-validation: {}".format(str(count), mcc)) acc_list.append(acc) mcc_list.append(mcc) save_classification_prediction(pred_test, pred_prob_test, label) print("save result ok!") count = count + 1 mean_acc = (1.0 * sum(acc_list)) / len(acc_list) mean_mcc = (1.0 * sum(mcc_list)) / len(mcc_list) print("-------------------------------------------------------") print("Mean ACC of 10 fold cross-validation: {}".format(mean_acc)) print("Mean MCC of 10 fold cross-validation: {}".format(mean_mcc))
def load_best_model(self): loaded = load_best_model_exp(self.train_eval_model.model_name) self.__dict__.update(loaded) self.best_model = load_model(self.best_model_dir, self.best_model_params) print("Loaded experiment with best model: {model} for data set: {data_set}".format(model=FLAGS.model, data_set=FLAGS.data_set))
def run(self): futures = [] if FLAGS.plot: progress = FloatProgress(min=0, max=1) display(progress) else: printProgressBar(0, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) done = 0.0 with (SameProcessExecutor() if self.num_workers <= 0 else concurrent.futures.ProcessPoolExecutor( self.num_workers)) as executor: for i in range(self.num_samples): inserted = False while not inserted: if len(futures) < self.num_workers or self.num_workers <= 0: x = self.optimizer.ask() # x is a list of n_points points objective_fun = self.train_eval_model.create_train_eval(i) args_named = self.to_named_params(x) futures.append( WorkItem(i, x, args_named, executor.submit(objective_fun, args=None, **args_named))) inserted = True for wi in list(futures): try: model_dir, train_eval, validation_eval, test_eval = wi.future.result(0) self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples else: printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) except concurrent.futures.TimeoutError: pass if len(futures) != 0 and len(futures) == self.num_workers: time.sleep(1) for wi in list(futures): model_dir, train_eval, validation_eval, test_eval = wi.future.result() self.train_eval_task_finished(futures, wi, model_dir, train_eval, validation_eval, test_eval) done += 1 if FLAGS.plot: progress.value = done / self.num_samples else: printProgressBar(done, self.num_samples, prefix='Progress experiment {model}/{data_set}:'. format(model=FLAGS.model, data_set=FLAGS.data_set), suffix='Complete', length=50) self.best_model = load_model(self.best_model_dir, self.best_model_params) predict_train, predict_valid, predict_test = invoke_in_process_pool(self.num_workers, Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.train_x, self.train_eval_model.data_loader.train_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.validation_x, self.train_eval_model.data_loader.validation_y), Callable(predict_estimator, self.best_model, self.train_eval_model.data_loader.test_x, self.train_eval_model.data_loader.test_y) ) self.best_model_train_ll = predict_train["log_likelihood"] self.best_model_valid_ll = predict_valid["log_likelihood"] self.best_model_test_ll = predict_test["log_likelihood"] self.save()