def input():
    contracts = ''
    psc_codes = db.session.query(consolidated_data2.product_or_service_code_description).order_by(consolidated_data2.product_or_service_code_description).distinct()
    set_asides = db.session.query(consolidated_data2.type_of_set_aside_code).order_by(consolidated_data2.type_of_set_aside_code).distinct()

# Precision, Recall, F1

    # Return contracts if on post request
    if request.method == "POST":
        queryString = request.form.get('set_aside')
        psc_filter = request.form['psc_code']
        
        AccuracyScore = metrics.Accuracy(queryString)
        PrecisionScore = metrics.Precision(queryString)
        RecallScore = metrics.Recall(queryString)
        F1Score = metrics.F1(queryString)

        if psc_filter == "All":
            contracts = consolidated_data2.query.filter_by(type_of_set_aside_code=queryString).order_by(consolidated_data2.base_and_exercised_options_value.desc()).limit(10)
        else:
            contracts = consolidated_data2.query.filter_by(type_of_set_aside_code=queryString, product_or_service_code_description=psc_filter).order_by(consolidated_data2.base_and_exercised_options_value.desc()).limit(10)

        # Log the set aside, psc code, and the query to the server. 
        print(queryString + " : " + psc_filter)
        print("return value: "+ str(contracts))
        return render_template("input.html", accuracy="90.27", psc_codes=psc_codes, contracts=contracts, set_asides=set_asides,
                              AccuracyScore=AccuracyScore, PrecisionScore=PrecisionScore, RecallScore=RecallScore,
                               F1Score=F1Score, set_aside=queryString, psc_filter=psc_filter)
    
    # If not a post request, just load the input page. 
    else:
        return render_template("input.html", psc_codes=psc_codes, contracts='', set_asides=set_asides,
                              AccuracyScore=0, PrecisionScore=0, RecallScore=0, F1Score=0, set_aside="None", psc_filter="All")
예제 #2
0
def default_classical_scorings(task="predict"):
    if task == 'predict':
        scorings = (mm.Accuracy(tensor=False),
                    mm.BalancedAccuracy(tensor=False),
                    mm.F1Score(average='macro', tensor=False),
                    mm.Precision(average='macro', tensor=False),
                    mm.Recall(average='macro', tensor=False),
                    mm.ROCAUC(average='macro', tensor=False))
    else:
        scorings = (mm.CIndex(tensor=False, hazard=True), )
    return scorings
예제 #3
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")
    # init
    data = tileData(args.root, args.img_rows, args.img_cols)
    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]
    # prediction
    for checkpoint in args.checkpoints:
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        performs = [[] for i in range(len(evaluators))]
        for idx in range(len(data)):
            print("Handling {} by {} \r".format(data.files[idx], checkpoint))
            x, y, shapes = data.slice_by_id(idx)
            # generate prediction
            with torch.set_grad_enabled(False):
                for step in range(0, x.shape[0], args.batch_size):
                    x_batch = x[step:step + args.batch_size]
                    y_batch = y[step:step + args.batch_size]
                    if args.cuda:
                        x_batch = x_batch.cuda()
                        y_batch = y_batch.cuda()
                    if is_multi:
                        y_pred = model(x_batch)[0].detach()
                    else:
                        y_pred = model(x_batch).detach()
                    # get performance
                    for i, evaluator in enumerate(evaluators):
                        performs[i].append(
                            evaluator(y_pred, y_batch)[0].item())

        performs = [(sum(p) / len(p)) for p in performs]
        performs = pd.DataFrame(
            [[time.strftime("%h_%d"), checkpoint] + performs],
            columns=['time', 'checkpoint'] + [repr(x) for x in evaluators])
        # save performance
        log_path = os.path.join(Result_DIR, "patchPerforms.csv")
        if os.path.exists(log_path):
            perform = pd.read_csv(log_path)
        else:
            perform = pd.DataFrame([])
        perform = perform.append(performs, ignore_index=True)
        perform.to_csv(log_path, index=False, float_format="%.3f")
예제 #4
0
def main():
    warnings.filterwarnings('ignore')

    # config
    parser = argparse.ArgumentParser()
    parser.add_argument('-s',
                        '--save',
                        default='./save',
                        help='保存的文件夹路径,如果有重名,会在其后加-来区别')
    parser.add_argument('-is',
                        '--image_size',
                        default=224,
                        type=int,
                        help='patch会被resize到多大,默认时224 x 224')
    parser.add_argument('-vts',
                        '--valid_test_size',
                        default=(0.1, 0.1),
                        type=float,
                        nargs=2,
                        help='训练集和测试集的大小,默认时0.1, 0.1')
    parser.add_argument('-bs',
                        '--batch_size',
                        default=32,
                        type=int,
                        help='batch size,默认时32')
    parser.add_argument('-nw',
                        '--num_workers',
                        default=12,
                        type=int,
                        help='多进程数目,默认时12')
    parser.add_argument('-lr',
                        '--learning_rate',
                        default=0.0001,
                        type=float,
                        help='学习率大小,默认时0.0001')
    parser.add_argument('-e',
                        '--epoch',
                        default=10,
                        type=int,
                        help='epoch 数量,默认是10')
    parser.add_argument('--reduction',
                        default='mean',
                        help='聚合同一bag的instances时的聚合方式,默认时mean')
    parser.add_argument('--multipler',
                        default=2.0,
                        type=float,
                        help="为了平衡pos和neg,在weight再乘以一个大于1的数,默认是2.0")
    args = parser.parse_args()
    save = args.save
    image_size = (args.image_size, args.image_size)
    valid_size, test_size = args.valid_test_size
    batch_size = args.batch_size
    num_workers = args.num_workers
    lr = args.learning_rate
    epoch = args.epoch
    reduction = args.reduction
    multipler = args.multipler

    # ----- 读取数据 -----
    neg_dir = './DATA/TCT/negative'
    pos_dir = './DATA/TCT/positive'

    dat = MilData.from2dir(neg_dir, pos_dir)
    train_transfer = transforms.Compose([
        transforms.RandomHorizontalFlip(),
        transforms.RandomVerticalFlip(),
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    test_transfer = transforms.Compose([
        transforms.Resize(image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])
    train_dat, valid_dat, test_dat = dat.split_by_bag(
        test_size,
        valid_size,
        train_transfer=train_transfer,
        valid_transfer=test_transfer,
        test_transfer=test_transfer)
    dataloaders = {
        'train':
        data.DataLoader(train_dat,
                        batch_size=batch_size,
                        num_workers=num_workers,
                        shuffle=True),
        'valid':
        data.DataLoader(
            valid_dat,
            batch_size=batch_size,
            num_workers=num_workers,
        ),
        'test':
        data.DataLoader(
            test_dat,
            batch_size=batch_size,
            num_workers=num_workers,
        )
    }

    # ----- 构建网络和优化器 -----
    net = NormalCnn()
    criterion = nn.BCELoss(reduction='none')
    optimizer = optim.Adam(net.parameters(), lr=lr)
    scorings = [
        mm.Loss(),
        mm.Recall(reduction=reduction),
        mm.ROCAUC(reduction=reduction),
        mm.BalancedAccuracy(reduction=reduction),
        mm.F1Score(reduction=reduction),
        mm.Precision(reduction=reduction),
        mm.Accuracy(reduction=reduction)
    ]

    # ----- 训练网络 -----
    try:
        net, hist, weighter = train(net,
                                    criterion,
                                    optimizer,
                                    dataloaders,
                                    epoch=epoch,
                                    metrics=scorings,
                                    weighter_multipler=multipler)

        test_hist = evaluate(net, dataloaders['test'], criterion, scorings)
    except Exception as e:
        import ipdb
        ipdb.set_trace()  # XXX BREAKPOINT

    # 保存结果
    dirname = check_update_dirname(save)
    torch.save(net.state_dict(), os.path.join(dirname, 'model.pth'))
    torch.save(weighter, os.path.join(dirname, 'weigher.pth'))
    pd.DataFrame(hist).to_csv(os.path.join(dirname, 'train.csv'))
    with open(os.path.join(dirname, 'config.json'), 'w') as f:
        json.dump(args.__dict__, f)
    with open(os.path.join(dirname, 'test.json'), 'w') as f:
        json.dump(test_hist, f)
예제 #5
0
model = l_models.YoloV3(l_config.filters, anchors, l_config.grid_sizes,
                        l_config.class_num)

loca_loss = l_losses.LocationLoss(anchors)
conf_loss = l_losses.ConfidenceLoss()
cate_loss = l_losses.CategoricalLoss()
all_loss = l_losses.AllLoss()

loca_metric = l_metrics.Location()
conf_metric = l_metrics.Confidence()
true_conf_metric = l_metrics.TrueConfidence()
false_conf_metric = l_metrics.FalseConfidence()
cate_metric = l_metrics.Categorical()

precision = l_metrics.Precision()
recall = l_metrics.Recall()

optimizer = tf.keras.optimizers.SGD(1e-3, momentum=0.9)

item = next(iter(train_ds))
pred = model(item[0])

# checkpoint = tf.keras.callbacks.ModelCheckpoint(l_config.SAVE_WEIGHT_FILE)
# tensor_board = tf.keras.callbacks.TensorBoard(l_config.BOARD_LOG_DIR, update_freq=10)

model.load_weights(l_config.save_weight_file)


def draw(image, loca, conf, cate, size):
    images = image[tf.newaxis, ...]
예제 #6
0
def run(args):
    all_lstm_history = dict()
    all_rnn_history = dict()
    for i in range(1, 16):
        dataset, length, nb_features, nb_skills = data_util.load_dataset(
            fn=args.f, batch_size=args.batch_size, shuffle=False, num_kc=i)

        train_set, test_set = data_util.split_dataset(
            dataset=dataset, total_size=length, test_fraction=args.test_split)

        print("\n[----- COMPILING  ------]")
        lstm = deepkt.DKTModel(nb_features=nb_features,
                               nb_skills=nb_skills,
                               hidden_units=args.hidden_units,
                               LSTM=True)
        lstm.compile(optimizer='adam',
                     metrics=[
                         metrics.BinaryAccuracy(),
                         metrics.AUC(),
                         metrics.Precision(),
                         metrics.Recall()
                     ])

        rnn = deepkt.DKTModel(nb_features=nb_features,
                              nb_skills=nb_skills,
                              hidden_units=args.hidden_units)
        rnn.compile(optimizer='adam',
                    metrics=[
                        metrics.BinaryAccuracy(),
                        metrics.AUC(),
                        metrics.Precision(),
                        metrics.Recall()
                    ])

        print(lstm.summary())
        print(rnn.summary())
        print("\n[-- COMPILING DONE  --]")

        print("\n[----- TRAINING ------]")
        lstm_history = lstm.fit(dataset=train_set,
                                epochs=args.epochs,
                                verbose=args.v)

        rnn_history = rnn.fit(dataset=train_set,
                              epochs=args.epochs,
                              verbose=args.v)
        print("\n[--- TRAINING DONE ---]")

        print("\n[----- TESTING  ------]")
        print("Number of KCs: ", i)
        lstm.evaluate(dataset=test_set, verbose=args.v)
        rnn.evaluate(dataset=test_set, verbose=args.v)
        print("\n[--- TESTING DONE  ---]")

        all_lstm_history[i] = lstm_history.history
        all_rnn_history[i] = rnn_history.history

        if i == 15:
            answers = data_util.get_answers(args.f)

            lstm_preds = lstm.get_predictions(test_set)
            rnn_preds = rnn.get_predictions(test_set)

            with open("lstm_roc.csv", 'w') as f:
                writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred'])
                writer.writeheader()
                for i in range(len(answers)):
                    student_answers = answers[i]
                    student = lstm_preds[i][0]
                    for j in range(len(student)):
                        question = student_answers[j]
                        skill = question[0]
                        y = question[1]
                        y_pred = student[j][skill]

                        writer.writerow({'y_pred': y_pred, 'y_actual': y})

            with open("rnn_roc.csv", 'w') as f:
                writer = csv.DictWriter(f, fieldnames=['y_actual', 'y_pred'])
                writer.writeheader()
                for i in range(len(answers)):
                    student_answers = answers[i]
                    student = rnn_preds[i][0]
                    for j in range(len(student)):
                        question = student_answers[j]
                        skill = question[0]
                        y = question[1]
                        y_pred = student[j][skill]

                        writer.writerow({'y_pred': y_pred, 'y_actual': y})

    write_accuracy(all_lstm_history, all_rnn_history)
예제 #7
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")
    # init
    data = tileData(args.root, args.img_rows, args.img_cols)
    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]
    # prediction
    for checkpoint in args.checkpoints:
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        Save_DIR = os.path.join(Result_DIR, "area", checkpoint.split("_")[0])
        if not os.path.exists(Save_DIR):
            os.makedirs(Save_DIR)
        performs = [[] for i in range(len(evaluators))]
        for idx in range(len(data)):
            print("Handling {} by {} \r".format(data.files[idx], checkpoint))
            x, y, shapes = data.slice_by_id(idx)
            # get prediction
            y_pred = []
            with torch.set_grad_enabled(False):
                for step in range(0, x.shape[0], args.batch_size):
                    x_batch = x[step:step + args.batch_size]
                    if args.cuda:
                        x_batch = x_batch.cuda()
                    # generate prediction
                    if is_multi:
                        y_pred.append(model(x_batch)[0].detach())
                    else:
                        y_pred.append(model(x_batch).detach())
            y_pred = torch.cat(y_pred, 0)
            if args.cuda:
                y_pred = y_pred.cpu()
            assert y_pred.shape[0] == x.shape[
                0], "All data should be iterated."
            del x
            pred_img = vision.slices_to_img(
                vision.ytensor_to_slices(y_pred, data.cmap), shapes)
            # y_img = vision.slices_to_img(
            #     vision.ytensor_to_slices(y, data.cmap), data.shapes)
            # merge slices into image & save result image
            imsave(os.path.join(Save_DIR, data.files[idx]),
                   pred_img,
                   compress=6)
            # N tensor 2 one
            # pred_tensor = vision.tensors_to_tensor(y_pred, shapes)
            # y_tensor = vision.tensors_to_tensor(y, shapes)
            # get performance
            for idx, evaluator in enumerate(evaluators):
                # a = evaluator(pred_tensor, y_tensor)[0].item()
                # b = evaluator(y_pred, y)[0].item()
                # print("{} => One : {} ; N : {}".format(repr(evaluator), a, b))
                performs[idx].append(evaluator(y_pred, y)[0].item())

        performs = [(sum(p) / len(p)) for p in performs]
        performs = pd.DataFrame(
            [[time.strftime("%h_%d"), checkpoint] + performs],
            columns=['time', 'checkpoint'] + [repr(x) for x in evaluators])
        # save performance
        log_path = os.path.join(Result_DIR, "areaPerforms.csv")
        if os.path.exists(log_path):
            perform = pd.read_csv(log_path)
        else:
            perform = pd.DataFrame([])
        perform = perform.append(performs, ignore_index=True)
        perform.to_csv(log_path, index=False, float_format="%.3f")
예제 #8
0
                                                       total_size=length,
                                                       test_fraction=0.2,
                                                       val_fraction=0.2)

print('-------compiling---------')
model = dpkt.DKTModel(nb_features=nb_features,
                      nb_skills=nb_skills,
                      hidden_units=128,
                      dropout_rate=0.3)

model.compile(optimizer=optimizer,
              metrics=[
                  metrics.BinaryAccuracy(),
                  metrics.AUC(),
                  metrics.Precision(),
                  metrics.Recall()
              ])

print(model.summary())
print("\nCompiling Done!")

print("_____________\nTraining!__________________")

model.fit(dataset=train_set,
          epochs=50,
          verbose=1,
          validation_data=val_set,
          callback=[
              tf.keras.callbacks.CSVLogger(CSV_Log),
              tf.keras.callbacks.ModelCheckpoint(model_path,
                                                 save_best_only=True,
예제 #9
0
def main(args):
    if args.cuda and not torch.cuda.is_available():
        raise ValueError("GPUs are not available, please run at cpu mode")

    evaluators = [
        metrics.OAAcc(),
        metrics.Precision(),
        metrics.Recall(),
        metrics.F1Score(),
        metrics.Kappa(),
        metrics.Jaccard()
    ]

    for checkpoint in args.checkpoints:
        print("Handling by {} ...\r".format(checkpoint))
        Save_DIR = os.path.join(Result_DIR, 'single', checkpoint.split("_")[0])
        if not os.path.exists(Save_DIR):
            os.makedirs(Save_DIR)
        # initialize datasets
        infos = checkpoint.split('_')[0].split('-')
        _, valset = load_dataset(infos[2], "IM")
        print("Testing with {}-Dataset: {} examples".format(
            infos[2], len(valset)))
        # Load checkpoint
        model, is_multi = load_checkpoint(checkpoint, args.cuda)
        # load data
        data_loader = DataLoader(
            valset,
            1,
            num_workers=4,
            shuffle=False,
            pin_memory=True,
        )
        performs = [[] for i in range(len(evaluators))]
        imgsets = []
        with torch.set_grad_enabled(False):
            for idx, sample in enumerate(data_loader):
                # get tensors from sample
                x = sample["src"]
                y = sample["tar"]
                if args.cuda:
                    x = x.cuda()
                    y = y.cuda()
                if is_multi:
                    gen_y = model(x)[0]
                else:
                    gen_y = model(x)
                # get performance
                for i, evaluator in enumerate(evaluators):
                    performs[i].append(
                        evaluator(gen_y.detach(), y.detach())[0].item())
                if args.cuda:
                    x = x.detach().cpu()
                    y = x.detach().cpu()
                    gen_y = gen_y.detach().cpu()
                x = x.numpy()[0].transpose((1, 2, 0))
                y = y.numpy()[0].transpose((1, 2, 0))
                gen_y = gen_y.numpy()[0].transpose((1, 2, 0))
                x_img = valset._src2img(x, whitespace=False)
                y_img = valset._tar2img(y, whitespace=False)
                gen_img = valset._tar2img(gen_y, whitespace=False)
                canny_x = vision.canny_edge(x_img)
                canny_y = vision.canny_edge(y_img)
                canny_gen = vision.canny_edge(gen_img)
                # mask_pair = vision.pair_to_rgb(gen_img, y_img, args.color)
                canny_pair = vision.pair_to_rgb(canny_y,
                                                canny_x,
                                                args.color,
                                                use_dilation=True,
                                                disk_value=args.disk)
                edge_pair = vision.pair_to_rgb(canny_gen,
                                               canny_y,
                                               args.color,
                                               use_dilation=True,
                                               disk_value=args.disk)
                imgsets.append([
                    vision.add_barrier(x_img, args.spaces),
                    vision.add_barrier(canny_pair, args.spaces),
                    # vision.add_barrier(mask_pair, args.spaces),
                    vision.add_barrier(edge_pair, args.spaces),
                ])
                if len(imgsets) >= args.disp_cols * args.gen_nb:
                    break
            # visualization
            for i in range(args.gen_nb):
                imgset = []
                for j in range(args.disp_cols):
                    imgset.append(
                        np.concatenate(imgsets[i * args.disp_cols + j],
                                       axis=0))
                vis_img = np.concatenate(imgset, axis=1)
                name = "{}_canny_segmap_edge_{}.png".format(
                    checkpoint.split('_')[0], i)
                imsave(os.path.join(Save_DIR, name),
                       vision.add_barrier(vis_img, args.spaces))
                print("Saving {} ...".format(name))
예제 #10
0
def main():

    # ----- 根据data来读取不同的数据和不同的loss、metrics -----
    if config.args.data == 'brca':
        rna = RnaData.predicted_data(config.brca_cli, config.brca_rna,
                                     {'PAM50Call_RNAseq': 'pam50'})
        rna.transform(tf.LabelMapper(config.brca_label_mapper))
        out_shape = len(config.brca_label_mapper)
        criterion = nn.CrossEntropyLoss()
        scorings = (mm.Loss(), mm.Accuracy(), mm.BalancedAccuracy(),
                    mm.F1Score(average='macro'), mm.Precision(average='macro'),
                    mm.Recall(average='macro'), mm.ROCAUC(average='macro'))
    elif config.args.data == 'survival':
        if os.path.exists('./DATA/temp_pan.pth'):
            rna = RnaData.load('./DATA/temp_pan.pth')
        else:
            rna = RnaData.survival_data(config.pan_cli, config.pan_rna,
                                        '_OS_IND', '_OS')
        out_shape = 1
        if config.args.loss_type == 'cox':
            criterion = NegativeLogLikelihood()
        elif config.args.loss_type == 'svm':
            criterion = SvmLoss(rank_ratio=config.args.svm_rankratio)
        scorings = (mm.Loss(), mm.CIndex())
    rna.transform(tf.ZeroFilterCol(0.8))
    rna.transform(tf.MeanFilterCol(1))
    rna.transform(tf.StdFilterCol(0.5))
    norm = tf.Normalization()
    rna.transform(norm)

    # ----- 构建网络和优化器 -----
    inpt_shape = rna.X.shape[1]
    if config.args.net_type == 'mlp':
        net = MLP(inpt_shape, out_shape, config.args.hidden_num,
                  config.args.block_num).cuda()
    elif config.args.net_type == 'atten':
        net = SelfAttentionNet(inpt_shape, out_shape, config.args.hidden_num,
                               config.args.bottle_num, config.args.block_num,
                               config.args.no_res, config.act,
                               config.args.no_head, config.args.no_bottle,
                               config.args.no_atten,
                               config.args.dropout_rate).cuda()
    elif config.args.net_type == 'resnet':
        net = ResidualNet(inpt_shape, out_shape, config.args.hidden_num,
                          config.args.bottle_num,
                          config.args.block_num).cuda()

    # ----- 训练网络,cross validation -----
    split_iterator = rna.split_cv(config.args.test_size,
                                  config.args.cross_valid)
    train_hists = []
    test_hists = []
    for split_index, (train_rna, test_rna) in enumerate(split_iterator):
        print('##### save: %s, split: %d #####' %
              (config.args.save, split_index))
        #  从train中再分出一部分用作验证集,决定停止
        train_rna, valid_rna = train_rna.split(0.1)
        dats = {
            'train': train_rna.to_torchdat(),
            'valid': valid_rna.to_torchdat(),
        }
        dataloaders = {
            k: data.DataLoader(v, batch_size=config.args.batch_size)
            for k, v in dats.items()
        }
        test_dataloader = data.DataLoader(test_rna.to_torchdat(),
                                          batch_size=config.args.batch_size)
        # 网络训练前都进行一次参数重置,避免之前的训练的影响
        net.reset_parameters()
        # train
        optimizer = optim.Adamax(net.parameters(),
                                 lr=config.args.learning_rate)
        lrs = config.lrs(optimizer)
        net, hist = train(
            net,
            criterion,
            optimizer,
            dataloaders,
            epoch=config.args.epoch,
            metrics=scorings,
            l2=config.args.l2,
            standard_metric_index=config.args.standard_metric_index,
            scheduler=lrs)
        # test
        test_res = evaluate(net, criterion, test_dataloader, metrics=scorings)
        # 将多次训练的结果保存到一个df中
        hist = pd.DataFrame(hist)
        hist['split_index'] = split_index
        train_hists.append(hist)
        # 保存多次test的结果
        test_res['split_index'] = split_index
        test_hists.append(test_res)
        # 每个split训练的模型保存为一个文件
        torch.save(net.state_dict(),
                   os.path.join(config.save_dir, 'model%d.pth' % split_index))
    # 保存train的结果
    train_hists = pd.concat(train_hists)
    train_hists.to_csv(os.path.join(config.save_dir, 'train.csv'))
    # 保存test的结果
    test_hists = pd.DataFrame(test_hists)
    test_hists.to_csv(os.path.join(config.save_dir, 'test.csv'))