def input(): contracts = '' psc_codes = db.session.query(consolidated_data2.product_or_service_code_description).order_by(consolidated_data2.product_or_service_code_description).distinct() set_asides = db.session.query(consolidated_data2.type_of_set_aside_code).order_by(consolidated_data2.type_of_set_aside_code).distinct() # Precision, Recall, F1 # Return contracts if on post request if request.method == "POST": queryString = request.form.get('set_aside') psc_filter = request.form['psc_code'] AccuracyScore = metrics.Accuracy(queryString) PrecisionScore = metrics.Precision(queryString) RecallScore = metrics.Recall(queryString) F1Score = metrics.F1(queryString) if psc_filter == "All": contracts = consolidated_data2.query.filter_by(type_of_set_aside_code=queryString).order_by(consolidated_data2.base_and_exercised_options_value.desc()).limit(10) else: contracts = consolidated_data2.query.filter_by(type_of_set_aside_code=queryString, product_or_service_code_description=psc_filter).order_by(consolidated_data2.base_and_exercised_options_value.desc()).limit(10) # Log the set aside, psc code, and the query to the server. print(queryString + " : " + psc_filter) print("return value: "+ str(contracts)) return render_template("input.html", accuracy="90.27", psc_codes=psc_codes, contracts=contracts, set_asides=set_asides, AccuracyScore=AccuracyScore, PrecisionScore=PrecisionScore, RecallScore=RecallScore, F1Score=F1Score, set_aside=queryString, psc_filter=psc_filter) # If not a post request, just load the input page. else: return render_template("input.html", psc_codes=psc_codes, contracts='', set_asides=set_asides, AccuracyScore=0, PrecisionScore=0, RecallScore=0, F1Score=0, set_aside="None", psc_filter="All")
def default_classical_scorings(task="predict"): if task == 'predict': scorings = (mm.Accuracy(tensor=False), mm.BalancedAccuracy(tensor=False), mm.F1Score(average='macro', tensor=False), mm.Precision(average='macro', tensor=False), mm.Recall(average='macro', tensor=False), mm.ROCAUC(average='macro', tensor=False)) else: scorings = (mm.CIndex(tensor=False, hazard=True), ) return scorings
def main(args): if args.cuda and not torch.cuda.is_available(): raise ValueError("GPUs are not available, please run at cpu mode") # init data = tileData(args.root, args.img_rows, args.img_cols) evaluators = [ metrics.OAAcc(), metrics.Precision(), metrics.Recall(), metrics.F1Score(), metrics.Kappa(), metrics.Jaccard() ] # prediction for checkpoint in args.checkpoints: model, is_multi = load_checkpoint(checkpoint, args.cuda) performs = [[] for i in range(len(evaluators))] for idx in range(len(data)): print("Handling {} by {} \r".format(data.files[idx], checkpoint)) x, y, shapes = data.slice_by_id(idx) # generate prediction with torch.set_grad_enabled(False): for step in range(0, x.shape[0], args.batch_size): x_batch = x[step:step + args.batch_size] y_batch = y[step:step + args.batch_size] if args.cuda: x_batch = x_batch.cuda() y_batch = y_batch.cuda() if is_multi: y_pred = model(x_batch)[0].detach() else: y_pred = model(x_batch).detach() # get performance for i, evaluator in enumerate(evaluators): performs[i].append( evaluator(y_pred, y_batch)[0].item()) performs = [(sum(p) / len(p)) for p in performs] performs = pd.DataFrame( [[time.strftime("%h_%d"), checkpoint] + performs], columns=['time', 'checkpoint'] + [repr(x) for x in evaluators]) # save performance log_path = os.path.join(Result_DIR, "patchPerforms.csv") if os.path.exists(log_path): perform = pd.read_csv(log_path) else: perform = pd.DataFrame([]) perform = perform.append(performs, ignore_index=True) perform.to_csv(log_path, index=False, float_format="%.3f")
def main(): warnings.filterwarnings('ignore') # config parser = argparse.ArgumentParser() parser.add_argument('-s', '--save', default='./save', help='保存的文件夹路径,如果有重名,会在其后加-来区别') parser.add_argument('-is', '--image_size', default=224, type=int, help='patch会被resize到多大,默认时224 x 224') parser.add_argument('-vts', '--valid_test_size', default=(0.1, 0.1), type=float, nargs=2, help='训练集和测试集的大小,默认时0.1, 0.1') parser.add_argument('-bs', '--batch_size', default=32, type=int, help='batch size,默认时32') parser.add_argument('-nw', '--num_workers', default=12, type=int, help='多进程数目,默认时12') parser.add_argument('-lr', '--learning_rate', default=0.0001, type=float, help='学习率大小,默认时0.0001') parser.add_argument('-e', '--epoch', default=10, type=int, help='epoch 数量,默认是10') parser.add_argument('--reduction', default='mean', help='聚合同一bag的instances时的聚合方式,默认时mean') parser.add_argument('--multipler', default=2.0, type=float, help="为了平衡pos和neg,在weight再乘以一个大于1的数,默认是2.0") args = parser.parse_args() save = args.save image_size = (args.image_size, args.image_size) valid_size, test_size = args.valid_test_size batch_size = args.batch_size num_workers = args.num_workers lr = args.learning_rate epoch = args.epoch reduction = args.reduction multipler = args.multipler # ----- 读取数据 ----- neg_dir = './DATA/TCT/negative' pos_dir = './DATA/TCT/positive' dat = MilData.from2dir(neg_dir, pos_dir) train_transfer = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) test_transfer = transforms.Compose([ transforms.Resize(image_size), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) train_dat, valid_dat, test_dat = dat.split_by_bag( test_size, valid_size, train_transfer=train_transfer, valid_transfer=test_transfer, test_transfer=test_transfer) dataloaders = { 'train': data.DataLoader(train_dat, batch_size=batch_size, num_workers=num_workers, shuffle=True), 'valid': data.DataLoader( valid_dat, batch_size=batch_size, num_workers=num_workers, ), 'test': data.DataLoader( test_dat, batch_size=batch_size, num_workers=num_workers, ) } # ----- 构建网络和优化器 ----- net = NormalCnn() criterion = nn.BCELoss(reduction='none') optimizer = optim.Adam(net.parameters(), lr=lr) scorings = [ mm.Loss(), mm.Recall(reduction=reduction), mm.ROCAUC(reduction=reduction), mm.BalancedAccuracy(reduction=reduction), mm.F1Score(reduction=reduction), mm.Precision(reduction=reduction), mm.Accuracy(reduction=reduction) ] # ----- 训练网络 ----- try: net, hist, weighter = train(net, criterion, optimizer, dataloaders, epoch=epoch, metrics=scorings, weighter_multipler=multipler) test_hist = evaluate(net, dataloaders['test'], criterion, scorings) except Exception as e: import ipdb ipdb.set_trace() # XXX BREAKPOINT # 保存结果 dirname = check_update_dirname(save) torch.save(net.state_dict(), os.path.join(dirname, 'model.pth')) torch.save(weighter, os.path.join(dirname, 'weigher.pth')) pd.DataFrame(hist).to_csv(os.path.join(dirname, 'train.csv')) with open(os.path.join(dirname, 'config.json'), 'w') as f: json.dump(args.__dict__, f) with open(os.path.join(dirname, 'test.json'), 'w') as f: json.dump(test_hist, f)
model = l_models.YoloV3(l_config.filters, anchors, l_config.grid_sizes, l_config.class_num) loca_loss = l_losses.LocationLoss(anchors) conf_loss = l_losses.ConfidenceLoss() cate_loss = l_losses.CategoricalLoss() all_loss = l_losses.AllLoss() loca_metric = l_metrics.Location() conf_metric = l_metrics.Confidence() true_conf_metric = l_metrics.TrueConfidence() false_conf_metric = l_metrics.FalseConfidence() cate_metric = l_metrics.Categorical() precision = l_metrics.Precision() recall = l_metrics.Recall() optimizer = tf.keras.optimizers.SGD(1e-3, momentum=0.9) item = next(iter(train_ds)) pred = model(item[0]) # checkpoint = tf.keras.callbacks.ModelCheckpoint(l_config.SAVE_WEIGHT_FILE) # tensor_board = tf.keras.callbacks.TensorBoard(l_config.BOARD_LOG_DIR, update_freq=10) model.load_weights(l_config.save_weight_file) def draw(image, loca, conf, cate, size): images = image[tf.newaxis, ...]
def run(args): all_lstm_history = dict() all_rnn_history = dict() for i in range(1, 16): dataset, length, nb_features, nb_skills = data_util.load_dataset( fn=args.f, batch_size=args.batch_size, shuffle=False, num_kc=i) train_set, test_set = data_util.split_dataset( dataset=dataset, total_size=length, test_fraction=args.test_split) print("\n[----- COMPILING ------]") lstm = deepkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=args.hidden_units, LSTM=True) lstm.compile(optimizer='adam', metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall() ]) rnn = deepkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=args.hidden_units) rnn.compile(optimizer='adam', metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall() ]) print(lstm.summary()) print(rnn.summary()) print("\n[-- COMPILING DONE --]") print("\n[----- TRAINING ------]") lstm_history = lstm.fit(dataset=train_set, epochs=args.epochs, verbose=args.v) rnn_history = rnn.fit(dataset=train_set, epochs=args.epochs, verbose=args.v) print("\n[--- TRAINING DONE ---]") print("\n[----- TESTING ------]") print("Number of KCs: ", i) lstm.evaluate(dataset=test_set, verbose=args.v) rnn.evaluate(dataset=test_set, verbose=args.v) print("\n[--- TESTING DONE ---]") all_lstm_history[i] = lstm_history.history all_rnn_history[i] = rnn_history.history if i == 15: answers = data_util.get_answers(args.f) lstm_preds = lstm.get_predictions(test_set) rnn_preds = rnn.get_predictions(test_set) with open("lstm_roc.csv", 'w') as f: writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred']) writer.writeheader() for i in range(len(answers)): student_answers = answers[i] student = lstm_preds[i][0] for j in range(len(student)): question = student_answers[j] skill = question[0] y = question[1] y_pred = student[j][skill] writer.writerow({'y_pred': y_pred, 'y_actual': y}) with open("rnn_roc.csv", 'w') as f: writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred']) writer.writeheader() for i in range(len(answers)): student_answers = answers[i] student = rnn_preds[i][0] for j in range(len(student)): question = student_answers[j] skill = question[0] y = question[1] y_pred = student[j][skill] writer.writerow({'y_pred': y_pred, 'y_actual': y}) write_accuracy(all_lstm_history, all_rnn_history)
def main(args): if args.cuda and not torch.cuda.is_available(): raise ValueError("GPUs are not available, please run at cpu mode") # init data = tileData(args.root, args.img_rows, args.img_cols) evaluators = [ metrics.OAAcc(), metrics.Precision(), metrics.Recall(), metrics.F1Score(), metrics.Kappa(), metrics.Jaccard() ] # prediction for checkpoint in args.checkpoints: model, is_multi = load_checkpoint(checkpoint, args.cuda) Save_DIR = os.path.join(Result_DIR, "area", checkpoint.split("_")[0]) if not os.path.exists(Save_DIR): os.makedirs(Save_DIR) performs = [[] for i in range(len(evaluators))] for idx in range(len(data)): print("Handling {} by {} \r".format(data.files[idx], checkpoint)) x, y, shapes = data.slice_by_id(idx) # get prediction y_pred = [] with torch.set_grad_enabled(False): for step in range(0, x.shape[0], args.batch_size): x_batch = x[step:step + args.batch_size] if args.cuda: x_batch = x_batch.cuda() # generate prediction if is_multi: y_pred.append(model(x_batch)[0].detach()) else: y_pred.append(model(x_batch).detach()) y_pred = torch.cat(y_pred, 0) if args.cuda: y_pred = y_pred.cpu() assert y_pred.shape[0] == x.shape[ 0], "All data should be iterated." del x pred_img = vision.slices_to_img( vision.ytensor_to_slices(y_pred, data.cmap), shapes) # y_img = vision.slices_to_img( # vision.ytensor_to_slices(y, data.cmap), data.shapes) # merge slices into image & save result image imsave(os.path.join(Save_DIR, data.files[idx]), pred_img, compress=6) # N tensor 2 one # pred_tensor = vision.tensors_to_tensor(y_pred, shapes) # y_tensor = vision.tensors_to_tensor(y, shapes) # get performance for idx, evaluator in enumerate(evaluators): # a = evaluator(pred_tensor, y_tensor)[0].item() # b = evaluator(y_pred, y)[0].item() # print("{} => One : {} ; N : {}".format(repr(evaluator), a, b)) performs[idx].append(evaluator(y_pred, y)[0].item()) performs = [(sum(p) / len(p)) for p in performs] performs = pd.DataFrame( [[time.strftime("%h_%d"), checkpoint] + performs], columns=['time', 'checkpoint'] + [repr(x) for x in evaluators]) # save performance log_path = os.path.join(Result_DIR, "areaPerforms.csv") if os.path.exists(log_path): perform = pd.read_csv(log_path) else: perform = pd.DataFrame([]) perform = perform.append(performs, ignore_index=True) perform.to_csv(log_path, index=False, float_format="%.3f")
train_set, test_set, val_set = data_util.split_dataset(dataset=dataset, total_size=length, test_fraction=0.2, val_fraction=0.2) print('-------compiling---------') model = dpkt.DKTModel(nb_features=nb_features, nb_skills=nb_skills, hidden_units=128, dropout_rate=0.3) model.compile(optimizer=optimizer, metrics=[ metrics.BinaryAccuracy(), metrics.AUC(), metrics.Precision(), metrics.Recall() ]) print(model.summary()) print("\nCompiling Done!") print("_____________\nTraining!__________________") model.fit(dataset=train_set, epochs=50, verbose=1, validation_data=val_set, callback=[ tf.keras.callbacks.CSVLogger(CSV_Log), tf.keras.callbacks.ModelCheckpoint(model_path,
def main(args): if args.cuda and not torch.cuda.is_available(): raise ValueError("GPUs are not available, please run at cpu mode") evaluators = [ metrics.OAAcc(), metrics.Precision(), metrics.Recall(), metrics.F1Score(), metrics.Kappa(), metrics.Jaccard() ] for checkpoint in args.checkpoints: print("Handling by {} ...\r".format(checkpoint)) Save_DIR = os.path.join(Result_DIR, 'single', checkpoint.split("_")[0]) if not os.path.exists(Save_DIR): os.makedirs(Save_DIR) # initialize datasets infos = checkpoint.split('_')[0].split('-') _, valset = load_dataset(infos[2], "IM") print("Testing with {}-Dataset: {} examples".format( infos[2], len(valset))) # Load checkpoint model, is_multi = load_checkpoint(checkpoint, args.cuda) # load data data_loader = DataLoader( valset, 1, num_workers=4, shuffle=False, pin_memory=True, ) performs = [[] for i in range(len(evaluators))] imgsets = [] with torch.set_grad_enabled(False): for idx, sample in enumerate(data_loader): # get tensors from sample x = sample["src"] y = sample["tar"] if args.cuda: x = x.cuda() y = y.cuda() if is_multi: gen_y = model(x)[0] else: gen_y = model(x) # get performance for i, evaluator in enumerate(evaluators): performs[i].append( evaluator(gen_y.detach(), y.detach())[0].item()) if args.cuda: x = x.detach().cpu() y = x.detach().cpu() gen_y = gen_y.detach().cpu() x = x.numpy()[0].transpose((1, 2, 0)) y = y.numpy()[0].transpose((1, 2, 0)) gen_y = gen_y.numpy()[0].transpose((1, 2, 0)) x_img = valset._src2img(x, whitespace=False) y_img = valset._tar2img(y, whitespace=False) gen_img = valset._tar2img(gen_y, whitespace=False) canny_x = vision.canny_edge(x_img) canny_y = vision.canny_edge(y_img) canny_gen = vision.canny_edge(gen_img) # mask_pair = vision.pair_to_rgb(gen_img, y_img, args.color) canny_pair = vision.pair_to_rgb(canny_y, canny_x, args.color, use_dilation=True, disk_value=args.disk) edge_pair = vision.pair_to_rgb(canny_gen, canny_y, args.color, use_dilation=True, disk_value=args.disk) imgsets.append([ vision.add_barrier(x_img, args.spaces), vision.add_barrier(canny_pair, args.spaces), # vision.add_barrier(mask_pair, args.spaces), vision.add_barrier(edge_pair, args.spaces), ]) if len(imgsets) >= args.disp_cols * args.gen_nb: break # visualization for i in range(args.gen_nb): imgset = [] for j in range(args.disp_cols): imgset.append( np.concatenate(imgsets[i * args.disp_cols + j], axis=0)) vis_img = np.concatenate(imgset, axis=1) name = "{}_canny_segmap_edge_{}.png".format( checkpoint.split('_')[0], i) imsave(os.path.join(Save_DIR, name), vision.add_barrier(vis_img, args.spaces)) print("Saving {} ...".format(name))
def main(): # ----- 根据data来读取不同的数据和不同的loss、metrics ----- if config.args.data == 'brca': rna = RnaData.predicted_data(config.brca_cli, config.brca_rna, {'PAM50Call_RNAseq': 'pam50'}) rna.transform(tf.LabelMapper(config.brca_label_mapper)) out_shape = len(config.brca_label_mapper) criterion = nn.CrossEntropyLoss() scorings = (mm.Loss(), mm.Accuracy(), mm.BalancedAccuracy(), mm.F1Score(average='macro'), mm.Precision(average='macro'), mm.Recall(average='macro'), mm.ROCAUC(average='macro')) elif config.args.data == 'survival': if os.path.exists('./DATA/temp_pan.pth'): rna = RnaData.load('./DATA/temp_pan.pth') else: rna = RnaData.survival_data(config.pan_cli, config.pan_rna, '_OS_IND', '_OS') out_shape = 1 if config.args.loss_type == 'cox': criterion = NegativeLogLikelihood() elif config.args.loss_type == 'svm': criterion = SvmLoss(rank_ratio=config.args.svm_rankratio) scorings = (mm.Loss(), mm.CIndex()) rna.transform(tf.ZeroFilterCol(0.8)) rna.transform(tf.MeanFilterCol(1)) rna.transform(tf.StdFilterCol(0.5)) norm = tf.Normalization() rna.transform(norm) # ----- 构建网络和优化器 ----- inpt_shape = rna.X.shape[1] if config.args.net_type == 'mlp': net = MLP(inpt_shape, out_shape, config.args.hidden_num, config.args.block_num).cuda() elif config.args.net_type == 'atten': net = SelfAttentionNet(inpt_shape, out_shape, config.args.hidden_num, config.args.bottle_num, config.args.block_num, config.args.no_res, config.act, config.args.no_head, config.args.no_bottle, config.args.no_atten, config.args.dropout_rate).cuda() elif config.args.net_type == 'resnet': net = ResidualNet(inpt_shape, out_shape, config.args.hidden_num, config.args.bottle_num, config.args.block_num).cuda() # ----- 训练网络,cross validation ----- split_iterator = rna.split_cv(config.args.test_size, config.args.cross_valid) train_hists = [] test_hists = [] for split_index, (train_rna, test_rna) in enumerate(split_iterator): print('##### save: %s, split: %d #####' % (config.args.save, split_index)) # 从train中再分出一部分用作验证集,决定停止 train_rna, valid_rna = train_rna.split(0.1) dats = { 'train': train_rna.to_torchdat(), 'valid': valid_rna.to_torchdat(), } dataloaders = { k: data.DataLoader(v, batch_size=config.args.batch_size) for k, v in dats.items() } test_dataloader = data.DataLoader(test_rna.to_torchdat(), batch_size=config.args.batch_size) # 网络训练前都进行一次参数重置,避免之前的训练的影响 net.reset_parameters() # train optimizer = optim.Adamax(net.parameters(), lr=config.args.learning_rate) lrs = config.lrs(optimizer) net, hist = train( net, criterion, optimizer, dataloaders, epoch=config.args.epoch, metrics=scorings, l2=config.args.l2, standard_metric_index=config.args.standard_metric_index, scheduler=lrs) # test test_res = evaluate(net, criterion, test_dataloader, metrics=scorings) # 将多次训练的结果保存到一个df中 hist = pd.DataFrame(hist) hist['split_index'] = split_index train_hists.append(hist) # 保存多次test的结果 test_res['split_index'] = split_index test_hists.append(test_res) # 每个split训练的模型保存为一个文件 torch.save(net.state_dict(), os.path.join(config.save_dir, 'model%d.pth' % split_index)) # 保存train的结果 train_hists = pd.concat(train_hists) train_hists.to_csv(os.path.join(config.save_dir, 'train.csv')) # 保存test的结果 test_hists = pd.DataFrame(test_hists) test_hists.to_csv(os.path.join(config.save_dir, 'test.csv'))