Esempio n. 1
0
def main():
    use_cuda = args.use_cuda

    train_data = UnlabeledContact(data=args.data_dir)
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441
    img_height = 21
    img_width = 21

    vae = AutoEncoder(code_size=20,
                      imgsize=input_size,
                      height=img_height,
                      width=img_width)
    criterion = nn.BCEWithLogitsLoss()

    if use_cuda:
        #vae = nn.DataParallel(vae)
        vae = vae.cuda()  #.half()
        criterion = criterion.cuda()

    optimizer = optim.SGD(vae.parameters(), lr=0.01)

    clock = AverageMeter(name='clock32single', rank=0)
    epoch_loss = 0
    total_loss = 0
    end = time.time()
    for epoch in range(15):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda()  #.half()
            inputs = Variable(inputs)
            optimizer.zero_grad()
            output, code = vae(inputs)
            loss = criterion(output, inputs)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            clock.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

    clock.save(
        path=
        '/home/ygx/libraries/mds/molecules/molecules/conv_autoencoder/runtimes'
    )
Esempio n. 2
0
def main():
    use_cuda = args.use_cuda

    train_data = UnlabeledContact(data=args.data_dir)
    print('Number of samples: {}'.format(len(train_data)))
    trainloader = DataLoader(train_data, batch_size=args.batch_size)

    # Contact matrices are 21x21
    input_size = 441

    encoder = Encoder(input_size=input_size, latent_size=3)
    decoder = Decoder(latent_size=3, output_size=input_size)
    vae = VAE(encoder, decoder, use_cuda=use_cuda)
    criterion = nn.MSELoss()

    if use_cuda:
        encoder = nn.DataParallel(encoder)
        decoder = nn.DataParallel(decoder)
        encoder = encoder.cuda().half()
        decoder = decoder.cuda().half()
        vae = nn.DataParallel(vae)
        vae = vae.cuda().half()
        criterion = criterion.cuda().half()

    optimizer = optim.SGD(vae.parameters(), lr=0.01)

    clock = AverageMeter(name='clock16', rank=0)
    epoch_loss = 0
    total_loss = 0
    end = time.time()
    for epoch in range(15):
        for batch_idx, data in enumerate(trainloader):
            inputs = data['cont_matrix']
            #           inputs = inputs.resize_(args.batch_size, 1, 21, 21)
            inputs = inputs.float()
            if use_cuda:
                inputs = inputs.cuda().half()
            inputs = Variable(inputs)
            optimizer.zero_grad()
            dec = vae(inputs)
            ll = latent_loss(vae.z_mean, vae.z_sigma)
            loss = criterion(dec, inputs) + ll
            loss.backward()
            optimizer.step()
            epoch_loss += loss.data[0]

            clock.update(time.time() - end)
            end = time.time()

            if batch_idx % args.log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch, batch_idx * len(data), len(trainloader.dataset),
                    100. * batch_idx / len(trainloader), loss.data[0]))

    clock.save(path='/home/ygx/libraries/mds/molecules/molecules/linear_vae')
Esempio n. 3
0
                            if b_i == 0:
                                joint_matrix = MICriterion._p_i_j.detach().cpu().numpy()  # noqa
                                fig = plt.figure()
                                plt.imshow(joint_matrix)
                                plt.colorbar()
                                writer.add_figure(
                                    tag=f"mi_head_{head}_{head_i_epoch}",
                                    figure=plt.gcf(), global_step=e_i, close=True
                                )
                                plt.close(plt.gcf())

                    scaler.scale(loss).backward()
                    scaler.step(optimiser)
                    scaler.update()

                    avg_loss_meter.update(loss.item())
                    mi_meter.update(avg_mi_batch.item())

                    state_dict = dict(zip(
                        ("Model ind", "epoch", "avg_loss", "inst_loss", "avg_mi", "inst_mi"),
                        (
                            config.model_ind, e_i,
                            avg_loss_meter.summary(),
                            loss.item(),
                            mi_meter.summary(),
                            avg_mi_batch.item(),
                        )
                    ))
                    indicator.set_postfix(state_dict)

        avg_loss = avg_loss_meter.summary()
Esempio n. 4
0
    def run_one_epoch(self, training):
        tic = time.time()
        batch_time = AverageMeter()
        losses = AverageMeter()
        accs = AverageMeter()
        if training:
            amnt = self.num_train
            dataset = self.train_loader
        else:
            dataset = self.val_loader
            amnt = self.num_valid
        with tqdm(total=amnt) as pbar:
            for i, data in enumerate(dataset):
                x, y = data
                # segmentation task
                if self.classification:
                    # assuming one-hot
                    y = y.view(1, -1).expand(self.model.num_heads, -1)
                else:
                    y = y.view(1, -1, 1, x.shape[-2], x.shape[-1]).expand(self.model.num_heads, -1, -1, -1, -1)
                if self.config.use_gpu:
                    x, y = x.cuda(), y.cuda()
                output = self.model(x)
                if training:
                    self.optimizer.zero_grad()
                loss = None

                for head in range(self.model.num_heads):
                    if loss is None:
                        loss = self.criterion(output[head], y[head])
                    else:
                        loss = loss + self.criterion(output[head], y[head])
                loss = loss / self.model.num_heads
                if training:
                    loss.backward()
                    self.optimizer.step()
                try:
                    loss_data = loss.data[0]
                except IndexError:
                    loss_data = loss.data.item()
                losses.update(loss_data)
                # measure elapsed time
                toc = time.time()
                batch_time.update(toc - tic)
                if self.classification:
                    _, predicted = torch.max(output.data, -1)
                    total = self.batch_size*self.model.num_heads
                    correct = (predicted == y).sum().item()
                    acc = correct/total
                    accs.update(acc)
                    pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f} acc {accs.avg:.3f}")
                else:
                    pbar.set_description(f"{(toc - tic):.1f}s - loss: {loss_data:.3f}")
                pbar.update(self.batch_size)
                if training and i % 2 == 0:
                    self.model.log_illumination(self.curr_epoch, i)
                if not training and i == 0 and not self.classification:
                    y_sample = y[0, 0].view(256, 256).detach().cpu().numpy()
                    p_sample = output[0, 0].view(256, 256).detach().cpu().numpy()
                    wandb.log({f"images_epoch{self.curr_epoch}": [
                        wandb.Image(np.round(p_sample * 255), caption="prediction"),
                        wandb.Image(np.round(y_sample * 255), caption="label")]}, step=self.curr_epoch)
        return losses.avg, accs.avg
Esempio n. 5
0
    def train(self,
              epoch,
              data_loader,
              opt_sn,
              opt_vn,
              mode,
              writer=None,
              print_freq=1):
        self.sn.train()
        self.vn.train()

        batch_time = AverageMeter()
        data_time = AverageMeter()
        losses_sn = AverageMeter()
        losses_vn = AverageMeter()
        ious = AverageMeter()

        end = time.time()

        for i, inputs in enumerate(data_loader):
            data_time.update(time.time() - end)

            img, lbl = self._parse_data(inputs)

            # train sn
            loss_sn, iou_, heat_map = self._forward_sn(img, lbl)
            losses_sn.update(loss_sn.data[0], lbl.size(0))
            ious.update(iou_, lbl.size(0))

            if mode == 'sn':
                # if opt_sn is None:
                #     img.volatile = True
                #     lbl.volatile = True
                # else:
                #     img.volatile = False
                #     lbl.volatile = False

                self.step(opt_sn, loss_sn)
            # train vn
            elif mode == 'vn':
                # heat_map = heat_map.detach()
                _, seg_pred = torch.max(heat_map, dim=1, keepdim=True)
                # seg_pred = onehot(seg_pred, 2)
                # heat_map = heat_map
                target_iou = iou(heat_map.data, lbl.data, average=False)

                loss_vn, iou_pred = self._forward_vn(img, heat_map, target_iou)
                losses_vn.update(loss_vn.data[0], lbl.size(0))
                self.step(opt_vn, loss_vn)

            # bp % gd
            # if opt_sn is not None:
            #     self.step(opt_sn, loss_sn)
            # if opt_vn is not None:
            #     self.step(opt_vn, loss_vn)

            batch_time.update(time.time() - end)
            end = time.time()

            if (i + 1) % print_freq == 0:
                print('Epoch: [{}][{}/{}]\t'
                      'Time {:.3f} ({:.3f})\t'
                      'Data {:.3f} ({:.3f})\t'
                      'Loss_sn {:.3f} ({:.3f})\t'
                      'Loss_vn {:.3f} ({:.3f})\t'
                      'Prec {:.2%} ({:.2%})\t'.format(
                          epoch, i + 1, len(data_loader), batch_time.val,
                          batch_time.avg, data_time.val, data_time.avg,
                          losses_sn.val, losses_sn.avg, losses_vn.val,
                          losses_vn.avg, ious.val, ious.avg))

        if writer is not None:
            summary_output_lbl(seg_pred.data, lbl.data, writer, epoch)
Esempio n. 6
0
def forward(data_loader,
            model,
            criterion,
            epoch,
            training,
            model_type,
            optimizer=None,
            writer=None):
    if training:
        model.train()
    else:
        model.eval()

    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()
    top5 = AverageMeter()

    end = time.time()

    total_steps = len(data_loader)

    for i, (inputs, target) in enumerate(data_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        inputs = inputs.to('cuda:0')
        target = target.to('cuda:0')

        # compute output
        output = model(inputs)
        if model_type == 'int':
            # omit the output exponent
            output, output_exp = output
            output = output.float()
            loss = criterion(output * (2**output_exp.float()), target)
        else:
            output_exp = 0
            loss = criterion(output, target)

        # measure accuracy and record loss
        losses.update(float(loss), inputs.size(0))
        prec1, prec5 = accuracy(output.detach(), target, topk=(1, 5))
        top1.update(float(prec1), inputs.size(0))
        top5.update(float(prec5), inputs.size(0))

        if training:
            if model_type == 'int':
                model.backward(target)

            elif model_type == 'hybrid':
                # float backward
                optimizer.update(epoch, epoch * len(data_loader) + i)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                #int8 backward
                model.backward()
            else:
                optimizer.update(epoch, epoch * len(data_loader) + i)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end)
        end = time.time()

        if i % args.log_interval == 0 and training:
            logging.info('{model_type} [{0}][{1}/{2}] '
                         'Time {batch_time.val:.3f} ({batch_time.avg:.3f}) '
                         'Data {data_time.val:.2f} '
                         'loss {loss.val:.3f} ({loss.avg:.3f}) '
                         'e {output_exp:d} '
                         '@1 {top1.val:.3f} ({top1.avg:.3f}) '
                         '@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                             epoch,
                             i,
                             len(data_loader),
                             model_type=model_type,
                             batch_time=batch_time,
                             data_time=data_time,
                             loss=losses,
                             output_exp=output_exp,
                             top1=top1,
                             top5=top5))

            if args.grad_hist:
                if args.model_type == 'int':
                    for idx, l in enumerate(model.forward_layers):
                        if hasattr(l, 'weight'):
                            grad = l.grad_int32acc
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), grad, epoch * total_steps + i)

                elif args.model_type == 'float':
                    for idx, l in enumerate(model.layers):
                        if hasattr(l, 'weight'):
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), l.weight.grad,
                                epoch * total_steps + i)
                    for idx, l in enumerate(model.classifier):
                        if hasattr(l, 'weight'):
                            writer.add_histogram(
                                'Grad/' + l.__class__.__name__ + '_' +
                                str(idx), l.weight.grad,
                                epoch * total_steps + i)

    return losses.avg, top1.avg, top5.avg
Esempio n. 7
0
def train_model(output_path,
                model,
                dataloaders,
                dataset_sizes,
                criterion,
                optimizer,
                num_epochs=5,
                scheduler=None):
    if not os.path.exists('iterations/' + str(output_path) + '/saved'):
        os.makedirs('iterations/' + str(output_path) + '/saved')
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    losses = AverageMeter()
    accuracies = AverageMeter()
    all_preds = []
    all_labels = []
    val_auc_all = []
    val_acc_all = []
    test_auc_all = []
    test_acc_all = []
    TPFPFN0_all = []
    TPFPFN1_all = []
    best_val_auc = 0.0
    best_epoch = 0
    for epoch in range(1, num_epochs + 1):
        print('-' * 50)
        print('Epoch {}/{}'.format(epoch, num_epochs))
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            # tqdm_loader = tqdm(dataloaders[phase])
            # for data in tqdm_loader:
            #     inputs, labels = data
            for i, (inputs, labels) in enumerate(dataloaders[phase]):

                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                # with torch.set_grad_enabled(True):
                outputs = model(inputs)
                _, preds = torch.max(outputs.data, 1)
                labels_onehot = torch.nn.functional.one_hot(labels,
                                                            num_classes=2)
                labels_onehot = labels_onehot.type(torch.FloatTensor)

                # BCEloss = torch.nn.functional.binary_cross_entropy_with_logits(outputs.cpu(), labels_onehot, torch.FloatTensor([1.0, 1.0]))
                BCEloss = criterion(outputs.cpu(), labels_onehot)
                # print("BCEloss", BCEloss)
                BCEloss_rank = binary_crossentropy_with_ranking(
                    outputs, labels_onehot)
                # print("BCEloss_rank", BCEloss_rank)
                # BCEloss_rank.requires_grad = True
                loss = BCEloss + 0 * BCEloss_rank
                # print("BCEloss, BCEloss_rank", BCEloss, BCEloss_rank)
                # loss = (BCEloss_rank + 1) * BCEloss

                loss.backward()
                optimizer.step()

                losses.update(loss.item(), inputs.size(0))
                acc = float(torch.sum(preds == labels.data)) / preds.shape[0]
                accuracies.update(acc)
                all_preds += list(
                    torch.nn.functional.softmax(outputs,
                                                dim=1)[:,
                                                       1].cpu().data.numpy())
                all_labels += list(labels.cpu().data.numpy())
                # tqdm_loader.set_postfix(loss=losses.avg, acc=accuracies.avg)

            auc = roc_auc_score(all_labels, all_preds)

            if phase == 'train':
                auc_t = auc
                loss_t = losses.avg
                acc_t = accuracies.avg
            if phase == 'val':
                auc_v = auc
                loss_v = losses.avg
                acc_v = accuracies.avg
                val_acc_all.append(acc_v)
                val_auc_all.append(auc_v)

        print('Train AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format(
            auc_t, loss_t, acc_t))
        print('Val AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format(
            auc_v, loss_v, acc_v))
        if auc_v > best_val_auc:
            best_val_auc = auc_v
            best_epoch = epoch
            # print(auc_v, best_val_auc)
            # print(best_epoch)
            best_model = copy.deepcopy(model)

        torch.save(
            model.module, './iterations/' + str(output_path) +
            '/saved/model_{}_epoch.pt'.format(epoch))
        # ############################################################################################################# Test
        for phase in ['test']:
            model.eval()  # Set model to evaluate mode

            for i, (inputs, labels) in enumerate(dataloaders[phase]):

                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(False):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs.data, 1)

                acc = float(torch.sum(preds == labels.data)) / preds.shape[0]
                accuracies.update(acc)
                all_preds += list(
                    torch.nn.functional.softmax(outputs,
                                                dim=1)[:,
                                                       1].cpu().data.numpy())
                all_labels += list(labels.cpu().data.numpy())
                # tqdm_loader.set_postfix(loss=losses.avg, acc=accuracies.avg)

            auc = roc_auc_score(all_labels, all_preds)

            auc_test = auc
            loss_test = losses.avg
            acc_test = accuracies.avg
            test_acc_all.append(acc_test)
            test_auc_all.append(auc_test)

        print('Test AUC: {:.8f} Loss: {:.8f} ACC: {:.8f} '.format(
            auc_test, loss_test, acc_test))

        nb_classes = 2
        confusion_matrix = torch.zeros(nb_classes, nb_classes)
        with torch.no_grad():
            TrueP0 = 0
            FalseP0 = 0
            FalseN0 = 0
            TrueP1 = 0
            FalseP1 = 0
            FalseN1 = 0
            for i, (inputs, classes) in enumerate(dataloaders[phase]):
                confusion_matrix = torch.zeros(nb_classes, nb_classes)
                input = inputs.to(device)
                target = classes.to(device)
                outputs = model(input)
                _, preds = torch.max(outputs, 1)
                for t, p in zip(target.view(-1), preds.view(-1)):
                    confusion_matrix[t, p] += 1
                this_class = 0
                col = confusion_matrix[:, this_class]
                row = confusion_matrix[this_class, :]
                TP = row[this_class]
                FN = sum(row) - TP
                FP = sum(col) - TP
                # print("TP, FP, FN: ", TP, FP, FN)
                TrueP0 = TrueP0 + TP
                FalseP0 = FalseP0 + FP
                FalseN0 = FalseN0 + FN

                this_class = 1
                col = confusion_matrix[:, this_class]
                row = confusion_matrix[this_class, :]
                TP = row[this_class]
                FN = sum(row) - TP
                FP = sum(col) - TP
                # print("TP, FP, FN: ", TP, FP, FN)
                TrueP1 = TrueP1 + TP
                FalseP1 = FalseP1 + FP
                FalseN1 = FalseN1 + FN
            TPFPFN0 = [TrueP0, FalseP0, FalseN0]
            TPFPFN1 = [TrueP1, FalseP1, FalseN1]
            TPFPFN0_all.append(TPFPFN0)
            TPFPFN1_all.append(TPFPFN1)
            print("overall_TP, FP, FN for 0: ", TrueP0, FalseP0, FalseN0)
            print("overall_TP, FP, FN for 1: ", TrueP1, FalseP1, FalseN1)

    print("best_ValidationEpoch:", best_epoch)
    # print(TPFPFN0_all, val_auc_all, test_auc_all)
    TPFPFN0_best = TPFPFN0_all[best_epoch - 1][0]
    TPFPFN1_best = TPFPFN1_all[best_epoch - 1][0]
    val_auc_best = val_auc_all[best_epoch - 1]
    val_acc_best = val_acc_all[best_epoch - 1]
    test_auc_best = test_auc_all[best_epoch - 1]
    test_acc_best = test_acc_all[best_epoch - 1]

    # #################### save only the best, delete others
    file_path = './iterations/' + str(output_path) + '/saved/model_' + str(
        best_epoch) + '_epoch.pt'
    if os.path.isfile(file_path):
        for CleanUp in glob.glob('./iterations/' + str(output_path) +
                                 '/saved/*.pt'):
            if 'model_' + str(best_epoch) + '_epoch.pt' not in CleanUp:
                os.remove(CleanUp)
    # # ######################################################

    return best_epoch, best_model, TPFPFN0_all[best_epoch - 1], TPFPFN1_all[
        best_epoch - 1], test_acc_best, test_auc_best


# def binary_crossentropy_with_ranking(y_true, y_pred):
#     """ Trying to combine ranking loss with numeric precision"""
#     # first get the log loss like normal
#     logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
#
#     # next, build a rank loss
#
#     # clip the probabilities to keep stability
#     y_pred_clipped = K.clip(y_pred, K.epsilon(), 1 - K.epsilon())
#
#     # translate into the raw scores before the logit
#     y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped))
#
#     # determine what the maximum score for a zero outcome is
#     y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true < 1))
#
#     # determine how much each score is above or below it
#     rankloss = y_pred_score - y_pred_score_zerooutcome_max
#
#     # only keep losses for positive outcomes
#     rankloss = rankloss * y_true
#
#     # only keep losses where the score is below the max
#     rankloss = K.square(K.clip(rankloss, -100, 0))
#
#     # average the loss for just the positive outcomes
#     rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1)
#
#     # return (rankloss + 1) * logloss - an alternative to try
#     return rankloss + logloss