def predict():
    test_data = cancerloader.load_cancer_data()
    torch.nn.Module.dump_patches = True

    model_name = "ubploss/transformer_res18-128_size0_autoloss_alpha16"
    model_dir = os.path.join("./modeldir/%s" % model_name)
    model_pth = os.path.join(model_dir, "model.pth")

    model = torch.load(model_pth).cuda()

    model.eval()
    with torch.no_grad():
        all_gene = []
        all_gt = []
        all_pd = []
        for item in cancerloader.batch_fv(test_data, len(test_data)):
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)
            pred = model(inputs)
            test_pd = torch_util.threshold_tensor_batch(pred)

            all_gene.extend(genes)
            all_gt.extend(labels.astype(np.int))
            all_pd.extend(np.array(test_pd))

    df = pd.DataFrame({'gene': all_gene, 'gt': all_gt, 'pd': all_pd})
    df.to_csv("cancer.txt", header=True, index=False, sep=',')
Exemple #2
0
def run_val(rnn, val_data, writer, val_step, criterion):
    print("------run val-----------", val_step)
    rnn.eval()
    with torch.no_grad():
        st = time.time()

        # for item in fvloader.batch_fv(val_data, len(val_data)):
        for item in matloader.batch_fv(val_data, len(val_data)):
            hidden = None
            genes, nimgs, labels, timesteps = item
            idx = np.argsort(np.array(-timesteps))
            s_nimgs = torch.from_numpy(np.stack(nimgs[idx])).type(
                torch.cuda.FloatTensor)
            s_labels = torch.from_numpy(labels[idx]).type(
                torch.cuda.FloatTensor)
            s_timesteps = timesteps[idx]
            out_pack, hidden = rnn(s_nimgs, s_timesteps)

        loss = criterion(out_pack, s_labels)
        writer.add_scalar("val loss", loss.item(), val_step)

        val_pd = torch_util.threshold_tensor_batch(out_pack)
        np_pd = val_pd.data.cpu().numpy()
        lab_f1_macro = torch_util.torch_metrics(labels[idx], np_pd, writer,
                                                val_step)

        et = time.time()
        writer.add_scalar("val time", et - st, val_step)
        return loss.item(), lab_f1_macro
Exemple #3
0
def run_val(model, val_dataset, writer, val_step, criterion, dcfg):
    model.eval()
    with torch.no_grad():
        val_loader = DataLoader(val_dataset,
                                batch_size=dcfg['bsize'],
                                shuffle=False,
                                num_workers=dcfg['nworker'],
                                collate_fn=dcfg['collate'])
        tot_loss = 0.0

        np_label = []
        np_pd = []
        for i_batch, sample_batched in enumerate(val_loader):
            (img, label) = sample_batched
            inputs = img.type(torch.cuda.FloatTensor)
            gt = label.type(torch.cuda.FloatTensor)
            pd = model(inputs)
            loss = criterion(pd, gt)
            tot_loss += loss

            val_pd = torch_util.threshold_tensor_batch(pd)
            np_pd.append(val_pd.data.cpu().numpy())
            np_label.append(gt.data.cpu().numpy())

        np_label = np.concatenate(np_label)
        np_pd = np.concatenate(np_pd)

        tot_loss = tot_loss / len(val_loader)
        writer.add_scalar("val loss", tot_loss.item(), val_step)
        torch_util.torch_metrics(np_label, np_pd, writer, val_step)

        return tot_loss.item()
Exemple #4
0
def run_val(model, dloader, val_data, writer, val_step, criterion):
    print("------run val-----------", val_step)
    model.eval()
    with torch.no_grad():
        st = time.time()

        for item in dloader.batch_fv(val_data, len(val_data)):
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)
            gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor)
            pd = model(inputs)

            # loss = criterion(pd, gt)
            # criterion = torch.nn.BCELoss(reduction='none')
            all_loss = criterion(pd, gt)
            label_loss = torch.mean(all_loss, dim=0)
            loss = torch.mean(label_loss)

            for i in range(6):
                writer.add_scalar("val sl_%d_loss" % i,
                                  label_loss[i].item(), val_step)
            writer.add_scalar("val loss", loss.item(), val_step)

            val_pd = torch_util.threshold_tensor_batch(pd)
            # val_pd = torch.ge(pd, 0.5)
            np_pd = val_pd.data.cpu().numpy()
            lab_f1_macro = torch_util.torch_metrics(
                labels, np_pd, writer, val_step)

        et = time.time()
        writer.add_scalar("val time", et - st, val_step)
        return loss.item(), lab_f1_macro
Exemple #5
0
def bag_mislocation():
    genes = [
        'ENSG00000128342', 'ENSG00000023734', 'ENSG00000105705',
        'ENSG00000111011', 'ENSG00000116863', 'ENSG00000197265',
        'ENSG00000160208', 'ENSG00000159352', 'ENSG00000115539',
        'ENSG00000163946', 'ENSG00000116161'
    ]

    torch.nn.Module.dump_patches = True
    model_name = "ubploss/transformer_res18-128_size0_autoloss_alpha16"
    model_dir = os.path.join("./modeldir/%s" % model_name)
    model_pth = os.path.join(model_dir, "model.pth")

    model = torch.load(model_pth).cuda()

    cfv_dir = '/ndata/longwei/hpa/cancerfv_4tissue'
    nfv_dir = '/ndata/longwei/hpa/tissuefv/res18_128'

    model.eval()
    with torch.no_grad():
        for g in genes:
            ngp = os.path.join(nfv_dir, '%s.npy' % g)
            cgp = os.path.join(cfv_dir, '%s.npy' % g)
            if not os.path.exists(ngp):
                print("normal fv for %s not exists" % g)
                continue
            if not os.path.exists(cgp):
                print("cancer fv for %s not exists" % g)
                continue

            npy = np.expand_dims(np.load(ngp), axis=0)
            nin = torch.from_numpy(npy).type(torch.cuda.FloatTensor)
            npd = model(nin)
            test_npd = torch_util.threshold_tensor_batch(npd)

            time.sleep(1)
            cpy = np.expand_dims(np.load(cgp), axis=0)
            cin = torch.from_numpy(cpy).type(torch.cuda.FloatTensor)
            cpd = model(cin)
            test_cpd = torch_util.threshold_tensor_batch(cpd)

            print("gene", g)
            print("normal pd", test_npd.data.cpu().numpy())
            print("cancer pd", test_cpd.data.cpu().numpy())
Exemple #6
0
def run_test(model, dloader, test_data, result):
    model.eval()
    with torch.no_grad():
        for item in dloader.batch_fv(test_data, len(test_data)):
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)
            pd = model(inputs)
            test_pd = torch_util.threshold_tensor_batch(pd)
            np_pd = test_pd.data.cpu().numpy()

            npmetrics.write_metrics(labels, np_pd, result)
Exemple #7
0
def run_val(model, dloader, val_data, writer, val_step, criterion):
    print("------run val-----------", val_step)
    model.eval()
    with torch.no_grad():
        st = time.time()
        # loss = 0
        # lab_f1_macro = 0
        num = 0
        gt = torch.from_numpy(np.array([[0 for _ in range(10)]
                                        ])).type(torch.cuda.FloatTensor)
        pd = torch.from_numpy(np.array([[0 for _ in range(10)]
                                        ])).type(torch.cuda.FloatTensor)

        for item in dloader.batch_fv(val_data, batch=1):
            num += 1
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)
            gt = torch.cat(
                (gt, torch.from_numpy(labels).type(torch.cuda.FloatTensor)))
            # print(gt)
            pd = torch.cat((pd, model(inputs)))
            # print(pd)

            # loss = criterion(pd, gt)
            # criterion = torch.nn.BCELoss(reduction='none')
        gt = gt[1:, :]
        pd = pd[1:, :]
        all_loss = criterion(pd, gt)
        label_loss = torch.mean(all_loss, dim=0)
        loss = torch.mean(label_loss)

        for i in range(num_class):
            writer.add_scalar("val sl_%d_loss" % i, label_loss[i].item(),
                              val_step)
        writer.add_scalar("val loss", loss.item(), val_step)

        val_pd = torch_util.threshold_tensor_batch(pd)
        # val_pd = torch.ge(pd, 0.5)
        np_pd = val_pd.data.cpu().numpy()  # hard predicts

        lab_f1_macro = torch_util.torch_metrics(gt.cpu().numpy(), np_pd,
                                                writer, val_step)
        # print(labels, pd.cpu().numpy())
        auc = evaluate.auc(gt.cpu().numpy(), pd.cpu().numpy())
        mif1 = evaluate.micro_f1(gt.cpu().numpy(), np_pd)
        maf1 = evaluate.macro_f1(gt.cpu().numpy(), np_pd)

        et = time.time()
        writer.add_scalar("val time", et - st, val_step)
        return loss.item(), lab_f1_macro, auc, mif1, maf1
Exemple #8
0
def run_test(rnn, test_data, result):
    rnn.eval()
    with torch.no_grad():
        # for item in fvloader.batch_fv(test_data, len(test_data)):
        for item in matloader.batch_fv(test_data, len(test_data)):
            genes, nimgs, labels, timesteps = item
            idx = np.argsort(np.array(-timesteps))

            s_nimgs = torch.from_numpy(np.stack(nimgs[idx])).type(
                torch.cuda.FloatTensor)
            s_timesteps = timesteps[idx]
            out_pack, hidden = rnn(s_nimgs, s_timesteps)

        test_pd = torch_util.threshold_tensor_batch(out_pack)
        npmetrics.write_metrics(labels[idx], np.array(test_pd), result)
Exemple #9
0
def predict_cancer_gene(model, gene):
    cfv_dir = '/ndata/longwei/hpa/cancerfv_4tissue'
    cgp = os.path.join(cfv_dir, '%s.npy' % gene)
    if not os.path.exists(cgp):
        print("cancer fv for %s not exists" % gene)
        return [0] * 6

    model.eval()
    with torch.no_grad():
        cpy = np.expand_dims(np.load(cgp), axis=0)
        cin = torch.from_numpy(cpy).type(torch.cuda.FloatTensor)
        cpd = model(cin)
        test_cpd = torch_util.threshold_tensor_batch(cpd)

        return test_cpd.data.cpu().numpy()
Exemple #10
0
def predict_normal_gene(model, gene):
    nfv_dir = '/ndata/longwei/hpa/tissuefv/res18_128'
    ngp = os.path.join(nfv_dir, '%s.npy' % gene)
    if not os.path.exists(ngp):
        print("normal fv for %s not exists" % gene)
        return [0] * 6

    model.eval()
    with torch.no_grad():

        npy = np.expand_dims(np.load(ngp), axis=0)
        nin = torch.from_numpy(npy).type(torch.cuda.FloatTensor)
        npd = model(nin)
        test_npd = torch_util.threshold_tensor_batch(npd)

        time.sleep(1)

        return test_npd.data.cpu().numpy()
Exemple #11
0
def run_test(model, test_dataset, result, dcfg):
    model.eval()
    with torch.no_grad():
        test_loader = DataLoader(test_dataset,
                                 batch_size=dcfg['bsize'],
                                 shuffle=False,
                                 num_workers=dcfg['nworker'],
                                 collate_fn=dcfg['collate'])
        np_label = []
        np_pd = []
        for i_batch, sample_batched in enumerate(test_loader):
            (img, label) = sample_batched
            inputs = img.type(torch.cuda.FloatTensor)
            gt = label.type(torch.cuda.FloatTensor)
            pd = model(inputs)
            test_pd = torch_util.threshold_tensor_batch(pd)
            np_pd.append(test_pd.data.cpu().numpy())
            np_label.append(gt.data.cpu().numpy())

        np_label = np.concatenate(np_label)
        np_pd = np.concatenate(np_pd)
        npmetrics.write_metrics(np_label, np_pd, result)
Exemple #12
0
def run_test(model, dloader, test_data, result):
    model.eval()
    with torch.no_grad():
        gt = np.array([[0 for _ in range(10)]])
        pd = np.array([[0 for _ in range(10)]])

        for item in dloader.batch_fv(test_data, batch=1):
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)

            # print(gt)
            apd = model(inputs)
            test_pd = torch_util.threshold_tensor_batch(apd)
            np_pd = test_pd.data.cpu().numpy()

            gt = np.concatenate((gt, labels))
            pd = np.concatenate((pd, np_pd))

        gt = gt[1:, :]
        pd = pd[1:, :]
        npmetrics.write_metrics(gt, pd, result)
Exemple #13
0
def run_origin_train(model, dloader, imbtrain_data, writer, step, criterion):
    print("------run origin imblance train data-----------", step)
    model.eval()
    with torch.no_grad():
        st = time.time()

        for item in dloader.batch_fv(imbtrain_data, len(imbtrain_data)):
            genes, nimgs, labels, timesteps = item

            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)
            gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor)
            pd = model(inputs)

            # loss = criterion(pd, gt)
            # criterion = torch.nn.BCELoss(reduction='none')
            all_loss = criterion(pd, gt)
            label_loss = torch.mean(all_loss, dim=0)
            loss = torch.mean(label_loss)

            for i in range(num_class):
                writer.add_scalar("origin sl_%d_loss" % i,
                                  label_loss[i].item(), step)
            writer.add_scalar("origin loss", loss.item(), step)

            val_pd = torch_util.threshold_tensor_batch(pd)
            # val_pd = torch.ge(pd, 0.5)
            np_pd = val_pd.data.cpu().numpy()
            torch_util.torch_metrics(labels,
                                     np_pd,
                                     writer,
                                     step,
                                     mode="origin")

        et = time.time()
        writer.add_scalar("origin time", et - st, step)
        return loss.item()
Exemple #14
0
def train(fv, model_name, criterion, balance=False, batchsize=64, size=0):
    if fv == "matlab":
        dloader = matloader
    else:
        dloader = fvloader

    train_data = dloader.load_train_data(size=size, balance=balance, fv=fv)
    val_data = dloader.load_val_data(size=size, fv=fv)
    test_data = dloader.load_test_data(size=size, fv=fv)
    # model_name = "transformer_%s_size%d_bce" % (fv, size)
    model_dir = os.path.join("./modeldir/%s" % model_name)
    model_pth = os.path.join(model_dir, "model.pth")

    writer = tensorboardX.SummaryWriter(model_dir)

    if os.path.exists(model_pth):
        print("------load model--------")
        model = torch.load(model_pth)
    else:
        # model = Transformer(fv, NUM_HEADS=4, NUM_LAYERS=3).cuda()
        model = Transformer(fv).cuda()
    model = nn.DataParallel(model)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=0.0001, weight_decay=0.001)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    #         optimizer, factor=0.5,
    #         patience=30, min_lr=1e-4)

    epochs = 2000
    step = 1
    val_step = 1
    max_f1 = 0.0

    for e in range(epochs):
        model.train()
        print("------epoch--------", e)
        st = time.time()

        train_shuffle = fvloader.shuffle(train_data)
        for item in fvloader.batch_fv(train_shuffle, batch=batchsize):

            # for name, param in model.named_parameters():
            #     writer.add_histogram(
            #         name, param.clone().cpu().data.numpy(), step)

            # writer.add_histogram(
            #     "grad/"+name, param.grad.clone().cpu().data.numpy(), step)
            model.zero_grad()

            genes, nimgs, labels, timesteps = item
            inputs = torch.from_numpy(nimgs).type(torch.cuda.FloatTensor)

            gt = torch.from_numpy(labels).type(torch.cuda.FloatTensor)
            pd = model(inputs)

            # loss = criterion(pd, gt)
            all_loss = criterion(pd, gt)
            label_loss = torch.mean(all_loss, dim=0)
            loss = torch.mean(label_loss)
            # for i in range(6):
            #     writer.add_scalar("train sl_%d_loss" % i,
            #                       label_loss[i].item(), step)

            train_pd = torch_util.threshold_tensor_batch(pd)
            np_pd = train_pd.data.cpu().numpy()
            torch_util.torch_metrics(
                labels, np_pd, writer, step, mode="train")

            writer.add_scalar("train loss", loss, step)
            loss.backward()
            optimizer.step()
            step += 1

        et = time.time()
        writer.add_scalar("train time", et - st, e)
        for param_group in optimizer.param_groups:
            writer.add_scalar("lr", param_group['lr'], e)

        # run_origin_train(model, imbtrain_data, writer, e, criterion)

        if e % 1 == 0:
            val_loss, val_f1 = run_val(
                model, dloader, val_data, writer, val_step, criterion)
            # scheduler.step(val_loss)
            val_step += 1
            if e == 0:
                start_loss = val_loss
                min_loss = start_loss

            # if val_loss > 2 * min_loss:
            #     print("early stopping at %d" % e)
            #     break
            # if e % 50 == 0:
            #     pt = os.path.join(model_dir, "%d.pt" % e)
            #     torch.save(model.state_dict(), pt)
            #     result = os.path.join(model_dir, "result_epoch%d.txt" % e)
            #     run_test(model, test_data, result)

            if min_loss > val_loss or max_f1 < val_f1:
                if min_loss > val_loss:
                    print("---------save best----------", "loss", val_loss)
                    min_loss = val_loss
                if max_f1 < val_f1:
                    print("---------save best----------", "f1", val_f1)
                    max_f1 = val_f1
                torch.save(model, model_pth)
                result = os.path.join(model_dir, "result_epoch%d.txt" % e)
                run_test(model, dloader, test_data, result)