Python CrossEntropy 예제들, loss.CrossEntropy Python 예제들

예제 #1

0

파일 보기

파일: gan.py 프로젝트: mathmerizing/Autoencoder

    def __init__(self, generator=MLP(), discriminator=MLP()):
        super().__init__()

        if generator != None and discriminator != None:
            self.generator = generator
            self.discriminator = discriminator
            self.layers = self.generator.layers + self.discriminator.layers
            self.generator.loss = CrossEntropy()
            self.discriminator.loss = CrossEntropy()

예제 #2

0

파일 보기

파일: train_variational_autoencoder.py 프로젝트: heinzermch/NeuralNetworkFromScratch

def train_variational_autoencoder(
    learning_rate: float,
    epochs: int,
    batch_size: int,
    latent_variables: int = 10,
    print_every: int = 50,
) -> None:
    print(
        f"Training a variational autoencoder for {epochs} epochs with batch size {batch_size}"
    )
    data_loader = DataLoader(batch_size)
    image_loss = CrossEntropy()
    divergence_loss = KLDivergenceStandardNormal()
    encoder_mean = Model([Linear(784, 50), ReLU(), Linear(50, latent_variables)])
    encoder_variance = Model(
        [Linear(784, 50), ReLU(), Linear(50, latent_variables), Exponential()]
    )
    reparameterization = Reparameterization()
    decoder = Model([Linear(latent_variables, 50), ReLU(), Linear(50, 784)])

    for i in range(epochs):
        # One training loop
        training_data = data_loader.get_training_data()

        for j, batch in enumerate(training_data):
            input, target = batch
            # Forward pass
            mean = encoder_mean(input)
            variance = encoder_variance(input)
            z = reparameterization(mean=mean, variance=variance)
            generated_samples = decoder(z)
            # Loss calculation
            divergence_loss_value = divergence_loss(mean, variance)
            generation_loss = image_loss(generated_samples, input)
            if j % print_every == 0:
                print(
                    f"Epoch {i+1}/{epochs}, "
                    f"training iteration {j+1}/{len(training_data)}"
                )
                print(
                    f"KL loss {np.round(divergence_loss_value, 2)}\t"
                    f"Generation loss {np.round(generation_loss, 2)}"
                )

            # Backward pass
            decoder_gradient = image_loss.gradient()
            decoder_gradient = decoder.backward(decoder_gradient)
            decoder_mean_gradient, decoder_variance_gradient = reparameterization.backward(
                decoder_gradient
            )
            encoder_mean_gradient, encoder_variance_gradient = (
                divergence_loss.gradient()
            )
            encoder_mean.backward(decoder_mean_gradient + encoder_mean_gradient)
            encoder_variance.backward(
                decoder_variance_gradient + encoder_variance_gradient
            )

예제 #3

0

파일 보기

def train(net: NeuralNetwork,
          inputs: Tensor,
          targets: Tensor,
          num_epochs: int = 5000,
          iterator: DataIterator = BatchIterator(),
          loss: Loss = CrossEntropy(),
          optimizer: Optimizer = MBGD(),
          showGraph: bool = False) -> None:
    losses = []
    for epoch in range(num_epochs):
        epoch_loss = 0.0
        for batch in iterator(inputs, targets):
            for X, Y in zip(batch.inputs, batch.targets):
                predicted = net.forward(X)
                epoch_loss += loss.loss(predicted, Y)
                grad = loss.grad(predicted, Y)
                net.backwards(grad)
                optimizer.step(net)

        print(epoch, epoch_loss)
        losses.append(epoch_loss)
        if epoch_loss < 300:
            pass
    if showGraph:
        plt.plot(losses)
        plt.show()

예제 #4

0

파일 보기

파일: train_phase1.py 프로젝트: saiftumrani/person-reid-incremental

def train(epoch, model, optim, trainloader):
    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()

    model.train()

    end = time.time()

    cross_entropy = CrossEntropy(num_classes=num_classes)
    triplet_loss_fn = TripletLoss(margin=margin)

    model.fc0.train(True)
    model.fc1.train(False)

    output_fc = "fc0"

    model.base.train(True)

    for batch, (imgs, pids, _) in enumerate(trainloader):
        imgs, pids = imgs.cuda(), pids.cuda()

        data_time.update(time.time() - end)

        clf_outputs, features = model(imgs)

        if isinstance(clf_outputs[output_fc], tuple):
            cross_entropy_loss = DeepSuperVision(cross_entropy,
                                                 clf_outputs[output_fc], pids)
        else:
            cross_entropy_loss = cross_entropy(clf_outputs[output_fc], pids)

        if isinstance(features, tuple):
            triplet_loss = DeepSuperVision(triplet_loss_fn, features, pids)
        else:
            triplet_loss = triplet_loss_fn(clf_outputs[output_fc], pids)

        loss = cross_entropy_loss + triplet_loss

        optim.zero_grad()
        loss.backward()
        optim.step()

        batch_time.update(time.time() - end)
        end = time.time()

        losses.update(loss.item(), pids.size(0))

        if (batch + 1) % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      epoch + 1,
                      batch + 1,
                      len(trainloader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses))

예제 #5

0

파일 보기

파일: train_classifier.py 프로젝트: heinzermch/NeuralNetworkFromScratch

def train_classifier(learning_rate: float,
                     epochs: int,
                     batch_size: int,
                     print_every: int = 50) -> None:
    data_loader = DataLoader(batch_size)

    loss = CrossEntropy()
    model = Model([Linear(784, 50), ReLU(), Linear(50, 10)])

    for i in range(epochs):
        # One training loop
        training_data = data_loader.get_training_data()
        validation_data = data_loader.get_validation_data()
        for j, batch in enumerate(training_data):
            input, target = batch
            y = model(input)
            loss(y, target)
            gradient = loss.gradient()
            model.backward(gradient)
            model.update(learning_rate)
            if j % print_every == 0:
                print(
                    f"Epoch {i+1}/{epochs}, training iteration {j+1}/{len(training_data)}"
                )

        accuracy_values = []
        loss_values = []
        # One validation loop
        for j, batch in enumerate(validation_data):
            input, target = batch
            y = model(input)
            loss_value = loss(y, target)
            accuracy = calculate_accuracy(y, target)
            accuracy_values.append(accuracy)
            loss_values.append(loss_value)

        print(
            f"Epoch {i+1}: loss {np.round(np.average(loss_values), 2)}, accuracy {np.round(np.average(accuracy_values), 2)}"
        )

예제 #6

0

파일 보기

파일: evaluation.py 프로젝트: qingsong99/person-reid-incremental

def train(epoch, model, optim, trainloader):

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()

    model.train()

    end = time.time()

    cross_entropy = CrossEntropy(num_classes=num_classes)
    triplet_loss_fn = TripletLoss(margin=margin)

    model.fc0.train(False)
    model.fc1.train(True)

    output_fc = "fc1"

    model.base.train(True)

    ################################################3
    person_per_batch = 8
    imgs_per_person = 4

    bmask = []
    l_all_pos = []
    l_all_neg = []
    pos_targets = torch.Tensor()
    neg_targets = torch.Tensor()
    C_pos = torch.zeros([train_batch, 256, 2, 4], device=device)
    C_neg = torch.zeros([train_batch, 256, 2, 4], device=device)
    ###################################

    for batch, (imgs, pids, camids) in enumerate(trainloader):
        #imgs,pids = imgs.cuda(), pids.cuda()
        pids = torch.Tensor.numpy(pids)
        camids = torch.Tensor.numpy(camids)
        uid = list(set(pids))
        mask = np.zeros(
            [2 * person_per_batch, person_per_batch * imgs_per_person])
        for i in range(len(uid)):
            sel = uid[i]
            # print(sel)
            pos = -1
            neg = -1
            k = -1
            for j in range(len(pids)):
                if (pids[j] == sel):
                    k = j
                    break

            for j in range(len(pids)):
                if (pids[k] == pids[j] and
                        camids[k] != camids[j]):  # Same IDs and diff cam IDs
                    pos = j  #Postive
                    break

            for j in range(len(pids)):
                if (pids[k] !=
                        pids[j]):  #Negative                # Diff Cam IDs
                    neg = j
                    break

            mask[2 * i][k] = 1
            mask[2 * i][pos] = 1
            mask[2 * i + 1][k] = 1
            mask[2 * i + 1][neg] = 1

        bmask.append(mask)

        l_batch_pos = []
        l_batch_neg = []
        kl = mask  #bmask[batch]
        for i in range(len(kl)):
            l5 = []
            for j in range(len(kl[i])):
                if (kl[i][j] == 1):
                    l5.append(j)
            if i % 2 < 1:
                l_batch_pos.append(l5)
            else:
                l_batch_neg.append(l5)
        l_all_pos.append(l_batch_pos)
        l_all_neg.append(l_batch_neg)

        data_time.update(time.time() - end)

        clf_outputs = model(imgs.cuda())
        f = activation['fc1.conv2']
        f = f.permute(0, 3, 1, 2)
        m = nn.AdaptiveAvgPool2d((256, 2))
        f = m(f)
        f = f.permute(0, 2, 3, 1)

        fc1 = clf_outputs[output_fc]

        for i in range(len(l_batch_pos)):
            pos_idx0 = l_batch_pos[i][0]
            pos_idx1 = l_batch_pos[i][1]
            #print(f[pos_idx0].shape)
            pos_targets = torch.sub(f[pos_idx1], f[pos_idx0])
            C_pos += pos_targets
        #print(pos_targets.shape)
        #pos_targets = torch.Tensor(pos_targets)

        for i in range(len(l_batch_neg)):
            neg_idx0 = l_batch_neg[i][0]
            neg_idx1 = l_batch_neg[i][1]
            neg_targets = torch.sub(f[neg_idx1], f[neg_idx0])
            C_neg += neg_targets

        g = Flatten(C_pos)

        y = Flatten(C_neg)

        u = g - y  # (bs,2048)
        v = torch.unsqueeze(u, 2)  # (64,2048,1)
        w = v.permute(0, 2, 1)  # (64,1,2048)
        x_net = torch.matmul(v, w)  # (64,2048,2048)
        y = torch.sum(x_net)
        y = F.relu(y)
        alpha = 1e-9
        beta = 0
        covariance_loss = 1 * (alpha * y - beta)

        pids = torch.from_numpy(pids)
        pids = pids.cuda()

        if isinstance(fc1, tuple):
            cross_entropy_loss = DeepSuperVision(cross_entropy, fc1, pids)
        else:
            cross_entropy_loss = cross_entropy(fc1, pids)
        """
        if isinstance(f,tuple):
            triplet = DeepSuperVision(triplet_loss_fn,f,pids)
        else:
            triplet = triplet_loss_fn(f,pids)
        """
        #print("xent", cross_entropy_loss)
        #print("covariance", covariance_loss)
        loss = cross_entropy_loss + covariance_loss
        #print("xent", cross_entropy_loss)
        #print("covariance_loss", covariance_loss)

        optim.zero_grad()
        loss.backward()
        optim.step()

        batch_time.update(time.time() - end)
        end = time.time()

        losses.update(loss.item(), pids.size(0))

        if (batch + 1) % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      epoch + 1,
                      batch + 1,
                      len(trainloader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses))

예제 #7

0

파일 보기

def loss_fn(num_classes, logits, labels):
    return CrossEntropy(num_classes=num_classes)(logits, labels)

예제 #8

0

파일 보기

파일: covariance_duke.py 프로젝트: saiftumrani/person-reid-incremental

def train(epoch, model, optim, trainloader):

    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()

    model.train()

    end = time.time()

    cross_entropy = CrossEntropy(num_classes=num_classes)
    triplet_loss_fn = TripletLoss(margin=margin)

    model.fc0.train(False)
    model.fc1.train(True)

    output_fc = "fc1"

    model.base.train(True)

    ################################################3
    person_per_batch = 8
    imgs_per_person = 4

    bmask = []
    l_all_pos = []
    l_all_neg = []
    pos_targets = torch.Tensor()
    neg_targets = torch.Tensor()
    C_pos0 = torch.zeros([train_batch, 256, 2, 4], device=device)
    C_pos1 = torch.zeros([train_batch, 256, 2, 4], device=device)
    C_neg0 = torch.zeros([train_batch, 256, 2, 4], device=device)
    C_neg1 = torch.zeros([train_batch, 256, 2, 4], device=device)
    ###################################

    for batch, (imgs, pids, camids) in enumerate(trainloader):
        #imgs,pids = imgs.cuda(), pids.cuda()
        pids = torch.Tensor.numpy(pids)
        camids = torch.Tensor.numpy(camids)
        uid = list(set(pids))
        mask = np.zeros(
            [2 * person_per_batch, person_per_batch * imgs_per_person])
        for i in range(len(uid)):
            sel = uid[i]
            # print(sel)
            pos = -1
            neg = -1
            k = -1
            for j in range(len(pids)):
                if (pids[j] == sel):
                    k = j
                    break

            for j in range(len(pids)):
                if (pids[k] == pids[j] and
                        camids[k] != camids[j]):  # Same IDs and diff cam IDs
                    pos = j  #Postive
                    break

            for j in range(len(pids)):
                if (pids[k] !=
                        pids[j]):  #Negative                # Diff Cam IDs
                    neg = j
                    break

            mask[2 * i][k] = 1
            mask[2 * i][pos] = 1
            mask[2 * i + 1][k] = 1
            mask[2 * i + 1][neg] = 1

        bmask.append(mask)

        l_batch_pos = []
        l_batch_neg = []
        kl = mask  #bmask[batch]
        for i in range(len(kl)):
            l5 = []
            for j in range(len(kl[i])):
                if (kl[i][j] == 1):
                    l5.append(j)
            if i % 2 < 1:
                l_batch_pos.append(l5)
            else:
                l_batch_neg.append(l5)
        l_all_pos.append(l_batch_pos)
        l_all_neg.append(l_batch_neg)

        data_time.update(time.time() - end)

        clf_outputs = model(imgs.cuda())
        f0 = activation['fc0.conv2']  #bs,2048,8,4
        f1 = activation['fc1.conv2']
        f0 = f0.permute(0, 3, 1, 2)
        f1 = f1.permute(0, 3, 1, 2)
        m = nn.AdaptiveAvgPool2d((256, 2))
        f0 = m(f0)
        f1 = m(f1)
        f0 = f0.permute(0, 2, 3, 1)
        f1 = f1.permute(0, 2, 3, 1)

        fc1 = clf_outputs[output_fc]

        # Computing postive samples
        for i in range(len(l_batch_pos)):
            pos_idx0 = l_batch_pos[i][0]
            pos_idx1 = l_batch_pos[i][1]

            pos_targets0 = torch.sub(f0[pos_idx1], f0[pos_idx0])
            pos_targets1 = torch.sub(f1[pos_idx1], f1[pos_idx0])
            C_pos0 += pos_targets0
            C_pos1 += pos_targets1

        # Computing negative samples
        for i in range(len(l_batch_neg)):
            neg_idx0 = l_batch_neg[i][0]
            neg_idx1 = l_batch_neg[i][1]

            neg_targets0 = torch.sub(f0[neg_idx1], f0[neg_idx0])
            neg_targets1 = torch.sub(f1[neg_idx1], f1[neg_idx0])
            C_neg0 += neg_targets0
            C_neg1 += neg_targets1

        g0 = Flatten(C_pos0)
        g1 = Flatten(C_pos1)

        y0 = Flatten(C_neg0)
        y1 = Flatten(C_neg1)

        u0 = g0 - y0  # (bs,2048)
        u1 = g1 - y1
        v0 = torch.unsqueeze(u0, 2)  # (64,2048,1)
        v1 = torch.unsqueeze(u1, 2)
        w0 = v0.permute(0, 2, 1)  # (64,1,2048)
        w1 = v1.permute(0, 2, 1)
        x_net0 = torch.matmul(v0, w0)  # (64,2048,2048)
        x_net1 = torch.matmul(v1, w1)
        r0 = torch.sum(x_net0)
        r1 = torch.sum(x_net1)
        r0_hinge = F.relu(r0)
        r1_hinge = F.relu(r1)
        alpha = 1e-9
        beta = 0
        covariance_loss = 1 * (alpha * r0_hinge - beta)

        domain_g = 1 * (alpha * (r1_hinge - r0_hinge) - beta)

        pids = torch.from_numpy(pids)
        pids = pids.cuda()

        if isinstance(fc1, tuple):
            cross_entropy_loss = DeepSuperVision(cross_entropy, fc1, pids)
        else:
            cross_entropy_loss = cross_entropy(fc1, pids)

        loss = cross_entropy_loss + covariance_loss + domain_g

        optim.zero_grad()
        loss.backward()
        optim.step()

        batch_time.update(time.time() - end)
        end = time.time()

        losses.update(loss.item(), pids.size(0))

        if (batch + 1) % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'.format(
                      epoch + 1,
                      batch + 1,
                      len(trainloader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses))

예제 #9

0

파일 보기

def main(args):
    total_size = 2000
    train_size = 1000
    test_size = 1000

    data, target = generate_disc_set(total_size, random_state=1)

    train_data, train_target = data[:train_size], target[:train_size]
    test_data, test_target = data[test_size:], target[test_size:]

    colours = ['blue', 'green', 'red']

    def colour_labels(labels):
        return list(map(lambda x: colours[x], labels))

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.scatter(train_data[:, 0],
                train_data[:, 1],
                c=colour_labels(train_target.argmax(1)),
                edgecolors='none')
    plt.title('Train Data')
    plt.xlabel(r'$x_{1}$')
    plt.ylabel(r'$x_{2}$')

    plt.subplot(1, 2, 2)
    plt.scatter(test_data[:, 0],
                test_data[:, 1],
                c=colour_labels(test_target.argmax(1)),
                edgecolors='none')
    plt.title('Test Data')
    plt.xlabel(r'$x_{1}$')
    plt.ylabel(r'$x_{2}$')

    plt.pause(1)
    plt.show(block=False)

    if args.loss == 'mse':
        net_loss = MSE()
        net = Sequential(DenseLayer(2, 25), ReLU(), DenseLayer(25, 25), ReLU(),
                         DenseLayer(25, 25), ReLU(), DenseLayer(25, 2))
    elif args.loss == 'softmax_loss':
        net_loss = CrossEntropy()
        net = Sequential(DenseLayer(2, 25), ReLU(), DenseLayer(25, 25), ReLU(),
                         DenseLayer(25, 25), ReLU(), DenseLayer(25, 2),
                         SoftMax())
    else:
        raise ValueError(
            args.loss +
            ' is invalid loss. Please use either \'mse\' or \'softmax_loss\'.')

    def sgd(x, dx, config):
        for cur_layer_x, cur_layer_dx in zip(x, dx):
            for cur_x, cur_dx in zip(cur_layer_x, cur_layer_dx):

                cur_old_grad = config['learning_rate'] * cur_dx

                if cur_old_grad.shape[0] == 1:
                    cur_x = cur_x.reshape(cur_old_grad.shape)

                cur_x.add_(-cur_old_grad)

    def train_model(model,
                    model_loss,
                    train_data,
                    train_target,
                    lr=0.005,
                    batch_size=1,
                    n_epoch=50):
        optimizer_config = {'learning_rate': lr}
        train_loss_history = []
        test_loss_history = []

        for i in range(n_epoch):
            loss = 0

            k = 0
            for x_batch, y_batch in get_batches(train_data, train_target,
                                                batch_size):
                model.zero_grad_params()

                # Forward
                pred = model.forward(x_batch)
                loss += model_loss.forward(pred, y_batch)

                # Backward
                lg = model_loss.backward(pred, y_batch)
                model.backward(lg)

                # Update weights
                sgd(net.get_params(), net.get_grad_params(), optimizer_config)
                k += 1

            train_loss_history.append(loss / k)

            test_pred = model.forward(test_data)
            test_loss = model_loss.forward(test_pred, test_target)
            test_loss_history.append(test_loss)

            print('#Epoch {}: current train loss = {:.4f}'.format(
                i + 1,
                loss.item() / k))

        return train_loss_history, test_loss_history

    print('Training started...')
    train_loss_history, test_loss_history = train_model(net,
                                                        net_loss,
                                                        train_data,
                                                        train_target,
                                                        n_epoch=50)

    print('Final train loss: {:.4f}'.format(train_loss_history[-1]))
    print('Final test loss: {:.4f}'.format(test_loss_history[-1]))

    plt.figure(2, figsize=(8, 6))
    plt.title("Train and Test Loss")
    plt.xlabel("#Epochs")
    plt.ylabel("loss")
    plt.plot(train_loss_history, 'b')
    plt.plot(test_loss_history, 'r')
    plt.legend(['train loss', 'test loss'])
    plt.pause(1)
    plt.show(block=False)

    train_res = net.forward(train_data)
    errors_train = compute_nb_errors(train_res, train_target)
    print("Number of errors on the train set: " + str(errors_train))
    train_res = train_res.argmax(1)
    train_res[train_res != train_target.argmax(1)] = 2

    test_res = net.forward(test_data)
    errors_test = compute_nb_errors(test_res, test_target)
    print("Number of errors on the test set: " + str(errors_test))
    test_res = test_res.argmax(1)
    test_res[test_res != test_target.argmax(1)] = 2

    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.scatter(train_data[:, 0],
                train_data[:, 1],
                c=colour_labels(train_res),
                edgecolors='none')
    plt.xlabel(r'$x_{1}$')
    plt.ylabel(r'$x_{2}$')
    plt.title(f'Train Data, {errors_train} errors')

    plt.subplot(1, 2, 2)
    plt.scatter(test_data[:, 0],
                test_data[:, 1],
                c=colour_labels(test_res),
                edgecolors='none')
    plt.xlabel(r'$x_{1}$')
    plt.ylabel(r'$x_{2}$')
    plt.title(f'Test Data, {errors_test} errors')

    plt.show()

예제 #10

0

파일 보기

def main():
    # generate data and translate labels
    train_features, train_targets = generate_all_datapoints_and_labels()
    test_features, test_targets = generate_all_datapoints_and_labels()
    train_labels, test_labels = convert_labels(train_targets), convert_labels(test_targets)


    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear +ReLU + Linear + ReLU + Linear + Tanh')
    print('Loss: MSE')
    print('Optimizer: SGD')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    # build network, loss and optimizer for Model 1
    my_model_design_1=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25), ReLU(),Linear(25,2),Tanh()]
    my_model_1=Sequential(my_model_design_1)
    optimizer_1=SGD(my_model_1,lr=1e-3)
    criterion_1=LossMSE()

    # train Model 1
    batch_size=1
    for epoch in range(50):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_1.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_1.forward(temp_train_feature)
                temp_train_loss=criterion_1.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
  
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_1.backward(temp_train_pred,temp_train_label)
                # accumulate parameter gradient in each batch
                my_model_1.backward(temp_train_loss_grad)                       
            
            # update parameters by optimizer
            optimizer_1.step()
            
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_1.forward(temp_test_feature)
            temp_test_loss=criterion_1.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        print("Epoch: {}/{}..".format(epoch+1, 50),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        
        
        
    # # visualize the classification performance of Model 1 on testing set
    test_pred_labels_1=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_1.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_1.append(int(test_targets[i]))
        else:
            test_pred_labels_1.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_1)
    axes.set_title('Classification Performance of Model 1')
    plt.show()
                      
      
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('*************************************************************************')
    print('Model: Linear + ReLU + Linear + Dropout+ SeLU + Linear + Dropout + ReLU + Linear + Sigmoid')
    print('Loss: Cross Entropy')
    print('Optimizer: Adam')
    print('*************************************************************************')
    print('Training')
    print('*************************************************************************')
    
    # build network, loss function and optimizer for Model 2
    my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), SeLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),
                       Sigmoid()]
    my_model_2=Sequential(my_model_design_2)
    optimizer_2=Adam(my_model_2,lr=1e-3)
    criterion_2=CrossEntropy()

    # train Model 2
    batch_size=1
    epoch=0
    while(epoch<25):
        temp_train_loss_sum=0.
        temp_test_loss_sum=0.
        num_train_correct=0
        num_test_correct=0
        
        # trained in batch-fashion: here batch size = 1
        for temp_batch in range(0,len(train_features), batch_size):
            temp_train_features=train_features.narrow(0, temp_batch, batch_size)  
            temp_train_labels=train_labels.narrow(0, temp_batch, batch_size)  
            
            for i in range(batch_size):
                # clean parameter gradient before each batch
                optimizer_2.zero_grad()  
                temp_train_feature=temp_train_features[i]
                temp_train_label=temp_train_labels[i]
                
                # forward pass to compute loss
                temp_train_pred=my_model_2.forward(temp_train_feature)
                temp_train_loss=criterion_2.forward(temp_train_pred,temp_train_label)
                temp_train_loss_sum+=temp_train_loss
                
                _, temp_train_pred_cat=torch.max(temp_train_pred,0)
                _, temp_train_label_cat=torch.max(temp_train_label,0)

                
                if temp_train_pred_cat==temp_train_label_cat:
                    num_train_correct+=1
       
                
                # calculate gradient according to loss gradient
                temp_train_loss_grad=criterion_2.backward(temp_train_pred,temp_train_label)
                '''
                if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
                    continue
                '''
                # accumulate parameter gradient in each batch
                my_model_2.backward(temp_train_loss_grad)     
                
            # update parameters by optimizer
            optimizer_2.step()
            
        # evaluate the current model on testing set
        # only forward pass is implemented
        for i_test in range(len(test_features)):
            temp_test_feature=test_features[i_test]
            temp_test_label=test_labels[i_test]

            temp_test_pred=my_model_2.forward(temp_test_feature)
            temp_test_loss=criterion_2.forward(temp_test_pred,temp_test_label)
            temp_test_loss_sum+=temp_test_loss

            
            _, temp_test_pred_cat=torch.max(temp_test_pred,0)
            _, temp_test_label_cat=torch.max(temp_test_label,0)

            if temp_test_pred_cat==temp_test_label_cat:
                num_test_correct+=1
            
            
        temp_train_loss_mean=temp_train_loss_sum/len(train_features)
        temp_test_loss_mean=temp_test_loss_sum/len(test_features)
        
        temp_train_accuracy=num_train_correct/len(train_features)
        temp_test_accuracy=num_test_correct/len(test_features)
        
        # in case there is gradient explosion problem, initiliza model again and restart training
        # but the situation seldom happens
        if (not temp_train_loss_grad[0]>=0) and (not temp_train_loss_grad[0]<0):
            epoch=0
            my_model_design_2=[Linear(2,25), ReLU(), Linear(25,25), Dropout(p=0.5), ReLU(),
                       Linear(25,25),Dropout(p=0.5), ReLU(),Linear(25,2),Sigmoid()]
            my_model_2=Sequential(my_model_design_2)
            optimizer_2=Adam(my_model_2,lr=1e-3)
            criterion_2=CrossEntropy()
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('--------------------------------------------------------------------------------')
            print('Restart training because of gradient explosion')
            continue
        
        print("Epoch: {}/{}..".format(epoch+1, 25),
                      "Training Loss: {:.4f}..".format(temp_train_loss_mean),
                      "Training Accuracy: {:.4f}..".format(temp_train_accuracy), 
                      "Validation/Test Loss: {:.4f}..".format(temp_test_loss_mean),
                      "Validation/Test Accuracy: {:.4f}..".format(temp_test_accuracy),  )
        epoch+=1 
        
    # visualize the classification performance of Model 2 on testing set
    test_pred_labels_2=[]
    for i in range(1000): 
        temp_test_feature=test_features[i]
        temp_test_label=test_labels[i]

        temp_test_pred=my_model_2.forward(temp_test_feature)

        _, temp_train_pred_cat=torch.max(temp_test_pred,0)
        if test_targets[i].int() == temp_train_pred_cat.int():
            test_pred_labels_2.append(int(test_targets[i]))
        else:
            test_pred_labels_2.append(2)
            
    fig,axes = plt.subplots(1,1,figsize=(6,6))
    axes.scatter(test_features[:,0], test_features[:,1], c=test_pred_labels_2)
    axes.set_title('Classification Performance of Model 2')
    plt.show()

예제 #11

0

파일 보기

                                                 targets,
                                                 test_size=0.2)

np.seterr(all='raise')
net = NeuralNetwork([
    LinearLayer(inputSize=64, outputSize=16),
    LeakyRelu(),
    LinearLayer(inputSize=16, outputSize=10),
    LeakyRelu(),
    Softmax()
])

train(net,
      inputs,
      targets,
      loss=CrossEntropy(),
      num_epochs=600,
      optimizer=MBGD(learningRate=0.0001),
      showGraph=True)

net.serialize("serializedMNIST.json")
# net.loadParamsFromFile("/home/ayush/scratch/Net/aknet/serializedMNIST.json")
total = len(xtest)
correct = 0
for x, y in zip(xtest, ytest):
    predicted = net.forward(x)
    if np.argmax(predicted) == np.argmax(y):
        correct += 1
    # plt.imshow(x.reshape((28,28)))
    # plt.show()
    print(np.argmax(predicted), np.argmax(y))

예제 #12

0

파일 보기

        return dz


def compute_acc(X_test, Y_test, net):
    '''Not one-hot encoded format'''
    acc = 0.0
    for i in range(X_test.shape[0]):
        y_h = net.forward(X_test[i])
        y = np.argmax(y_h)
        if (y == Y_test[i]):
            acc += 1.0
    return acc / Y_test.shape[0]


if __name__ == 'main':
    loss = CrossEntropy()
    net = MnistNetMiniBatch()
    learning_rate = 0.001
    L_train = []
    L_test = []
    Acc_train = []
    Acc_test = []
    len_mini_batch = 10
    for it in range(100):
        L_acc = 0.
        sh = list(range(train_x.shape[0]))
        np.random.shuffle(sh)
        for i in range(train_x.shape[0]):
            x = train_x[sh[i]]
            y = train_y_oh[sh[i]]
            y_h = net.forward(x)

예제 #13

0

파일 보기

def main(cfg, gpus):
    # Network Buildersn
    net_encoder = ModelBuilder.build_encoder(
        arch=cfg.MODEL.arch_encoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        weights=cfg.MODEL.weights_encoder)
    net_decoder = ModelBuilder.build_decoder(
        arch=cfg.MODEL.arch_decoder.lower(),
        fc_dim=cfg.MODEL.fc_dim,
        num_class=cfg.DATASET.num_class,
        weights=cfg.MODEL.weights_decoder)

    crit = CrossEntropy()

    if cfg.MODEL.arch_decoder.endswith('deepsup'):
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit,
                                                 cfg.TRAIN.deep_sup_scale)
    else:
        segmentation_module = SegmentationModule(net_encoder, net_decoder,
                                                 crit)

    # Dataset and Loader
    dataset_train = TrainDataset(cfg.DATASET.root_dataset,
                                 cfg.DATASET.list_train,
                                 cfg.DATASET,
                                 batch_per_gpu=cfg.TRAIN.batch_size_per_gpu)

    loader_train = torch.utils.data.DataLoader(
        dataset_train,
        batch_size=len(gpus),  # we have modified data_parallel
        shuffle=False,  # we do not use this param
        collate_fn=user_scattered_collate,
        num_workers=cfg.TRAIN.workers,
        drop_last=True,
        pin_memory=True)
    print('1 Epoch = {} iters'.format(cfg.TRAIN.epoch_iters))

    # create loader iterator
    iterator_train = iter(loader_train)

    # load nets into gpu
    if len(gpus) > 1:
        segmentation_module = UserScatteredDataParallel(segmentation_module,
                                                        device_ids=gpus)
        # For sync bn
        patch_replication_callback(segmentation_module)
    segmentation_module.cuda()

    # Set up optimizers
    nets = (net_encoder, net_decoder, crit)
    optimizers = create_optimizers(nets, cfg)

    # Main loop
    history = {'train': {'epoch': [], 'loss': [], 'acc': []}}

    for epoch in range(cfg.TRAIN.start_epoch, cfg.TRAIN.num_epoch):
        train(segmentation_module, iterator_train, optimizers, history,
              epoch + 1, cfg)

        # checkpointing
        checkpoint(nets, history, cfg, epoch + 1)

    print('Training Done!')

예제 #14

0

파일 보기

파일: train.py 프로젝트: hakonnoren/neural_network

def train(net: NeuralNet,
          inputs,
          targets,
          num_epochs=100,
          batch_size=5,
          loss=CrossEntropy(),
          optimizer=Optimizer(),
          regularizer=False,
          validation=True,
          verbose=False):
    """
    inputs.shape = [sample_size,n_samples]
    targets.shape = [target_shape,n_samples]

    """

    reg_cost = lambda x: 0
    if regularizer:
        reg_cost = regularizer.reg

    if validation:
        validator = Validation(inputs, targets, validation_fraction=0.2)
        inputs = inputs[..., 0:validator.train_size]
        targets = targets[..., 0:validator.train_size]

    epoch_losses = []

    for epoch in range(num_epochs):
        epoch_loss = 0.0

        starts = np.arange(0, len(inputs[-1]), batch_size)
        np.random.shuffle(starts)

        for start in starts:
            end = start + batch_size
            num_batches = len(starts)
            predicted = net.forward(inputs[..., start:end], verbose=verbose)

            epoch_loss += loss.loss(
                predicted, targets[...,
                                   start:end]) / num_batches  #+ reg_cost(net)

            grad = loss.grad(predicted, targets[..., start:end])
            net.backward(grad, verbose=verbose)

            if not isinstance(regularizer, bool):
                net.backward_regularizer(regularizer.grad_func)

            optimizer.step(net)
        if validation:
            validator.validate(net, loss)
        print(epoch, epoch_loss)
        epoch_losses.append(epoch_loss)

    if validation:
        plt.plot(np.linspace(0, num_epochs,
                             num_epochs // validator.validation_freq),
                 validator.v_errors,
                 label="Validation")
    plt.plot(np.linspace(0, num_epochs, num_epochs),
             epoch_losses,
             label="Training")
    plt.legend()

예제 #15

0

파일 보기

def train_GRAM(seqFile='seqFile.txt',
               labelFile='labelFile.txt',
               treeFile='tree.txt',
               embFile='embFile.txt',
               outFile='out.txt',
               inputDimSize=100,
               numAncestors=100,
               embDimSize=100,
               hiddenDimSize=200,
               attentionDimSize=200,
               max_epochs=100,
               L2=0.,
               numClass=26679,
               batchSize=100,
               dropoutRate=0.5,
               logEps=1e-8,
               verbose=True,
               ignore_level=0):
    options = locals().copy()
    # 这里的leavesList， ancestorsList蕴含着每一个疾病的类别信息
    leavesList = []
    ancestorsList = []
    for i in range(5, 0, -1):
        leaves, ancestors = build_tree(treeFile + '.level' + str(i) + '.pk')
        leavesList.append(leaves)
        ancestorsList.append(ancestors)

    print('Building the model ... ')
    gram = GRAM(inputDimSize, numAncestors, embDimSize, hiddenDimSize,
                attentionDimSize, numClass, dropoutRate, embFile)
    # if torch.cuda.device_count() > 1:
    #     print("Let's use", torch.cuda.device_count(), "GPUs!")
    #     # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
    #     gram = nn.DataParallel(gram)
    gram.to(device)
    # gram.train()
    print(list(gram.state_dict()))
    loss_fn = CrossEntropy()
    loss_fn.to(device)

    print('Constructing the optimizer ... ')
    optimizer = torch.optim.Adadelta(gram.parameters(), lr=1, weight_decay=L2)

    print('Loading data ... ')
    trainSet, validSet, testSet = load_data(seqFile,
                                            labelFile,
                                            test_ratio=0.15,
                                            valid_ratio=0.1)
    print('Data length:', len(trainSet[0]))
    n_batches = int(np.ceil(float(len(trainSet[0])) / float(batchSize)))
    val_batches = int(np.ceil(float(len(validSet[0])) / float(batchSize)))
    test_batches = int(np.ceil(float(len(testSet[0])) / float(batchSize)))

    print('Optimization start !!')
    # setting the tensorboard
    loss_writer = SummaryWriter('{}/{}'.format(outFile + 'TbLog', 'Loss'))
    acc_writer = SummaryWriter('{}/{}'.format(outFile + 'TbLog', 'Acc'))
    # test_writer = SummaryWriter('{}/{}'.format(outFile+'TbLog', 'Test'))

    logFile = outFile + '.log'
    bestTrainCost = 0.0
    bestValidCost = 100000.0
    bestTestCost = 0.0
    bestTrainAcc = 0.0
    bestValidAcc = 0.0
    bestTestAcc = 0.0
    epochDuration = 0.0
    bestEpoch = 0
    # set the random seed for test
    random.seed(seed)
    # with torchsnooper.snoop():
    for epoch in range(max_epochs):
        iteration = 0
        cost_vec = []
        acc_vec = []
        startTime = time.time()
        gram.train()
        for index in random.sample(range(n_batches), n_batches):
            optimizer.zero_grad()
            batchX = trainSet[0][index * batchSize:(index + 1) * batchSize]
            batchY = trainSet[1][index * batchSize:(index + 1) * batchSize]
            x, y, mask, lengths = padMatrix(batchX, batchY, options)
            x = torch.from_numpy(x).to(device).float()
            mask = torch.from_numpy(mask).to(device).float()
            # print('x,', x.size())
            y_hat = gram(x, mask, leavesList, ancestorsList)
            # print('y_hat', y_hat.size())
            y = torch.from_numpy(y).float().to(device)
            # print('y', y.size())
            lengths = torch.from_numpy(lengths).float().to(device)
            # print(y.size(), y_hat.size())
            loss, acc = loss_fn(y_hat, y, lengths)
            loss.backward()
            optimizer.step()
            if iteration % 100 == 0 and verbose:
                buf = 'Epoch:%d, Iteration:%d/%d, Train_Cost:%f, Train_Acc:%f' % (
                    epoch, iteration, n_batches, loss, acc)
                print(buf)
            cost_vec.append(loss.item())
            acc_vec.append(acc)
            iteration += 1
        duration_optimize = time.time() - startTime
        gram.eval()
        cost = np.mean(cost_vec)
        acc = np.mean(acc_vec)
        startTime = time.time()
        with torch.no_grad():
            # calculate the loss and acc of valid dataset
            cost_vec = []
            acc_vec = []
            for index in range(val_batches):
                validX = validSet[0][index * batchSize:(index + 1) * batchSize]
                validY = validSet[1][index * batchSize:(index + 1) * batchSize]
                val_x, val_y, mask, lengths = padMatrix(
                    validX, validY, options)
                val_x = torch.from_numpy(val_x).float().to(device)
                mask = torch.from_numpy(mask).float().to(device)
                val_y_hat = gram(val_x, mask, leavesList, ancestorsList)
                val_y = torch.from_numpy(val_y).float().to(device)
                lengths = torch.from_numpy(lengths).float().to(device)
                valid_cost, valid_acc = loss_fn(val_y_hat, val_y, lengths)
                cost_vec.append(valid_cost.item())
                acc_vec.append(valid_acc)
            valid_cost = np.mean(cost_vec)
            valid_acc = np.mean(acc_vec)

            # calculate the loss and acc of test dataset
            cost_vec = []
            acc_vec = []
            for index in range(test_batches):
                testX = testSet[0][index * batchSize:(index + 1) * batchSize]
                testY = testSet[1][index * batchSize:(index + 1) * batchSize]
                test_x, test_y, mask, lengths = padMatrix(
                    testX, testY, options)
                test_x = torch.from_numpy(test_x).float().to(device)
                mask = torch.from_numpy(mask).float().to(device)
                test_y_hat = gram(test_x, mask, leavesList, ancestorsList)
                test_y = torch.from_numpy(test_y).float().to(device)
                lengths = torch.from_numpy(lengths).float().to(device)
                test_cost, test_acc = loss_fn(test_y_hat, test_y, lengths)
                cost_vec.append(test_cost.item())
                acc_vec.append(test_acc)
            test_cost = np.mean(cost_vec)
            test_acc = np.mean(acc_vec)
        # record the loss and acc
        loss_writer.add_scalar('Train Loss', cost, epoch)
        loss_writer.add_scalar('Test Loss', test_cost, epoch)
        loss_writer.add_scalar('Valid Loss', valid_cost, epoch)
        acc_writer.add_scalar('Train Acc', acc, epoch)
        acc_writer.add_scalar('Test Acc', test_acc, epoch)
        acc_writer.add_scalar('Valid Acc', valid_acc, epoch)

        # print the loss
        duration_metric = time.time() - startTime
        buf = 'Epoch:%d, Train_Cost:%f, Valid_Cost:%f, Test_Cost:%f' % (
            epoch, cost, valid_cost, test_cost)
        print(buf)
        print2file(buf, logFile)
        buf = 'Train_Acc:%f, Valid_Acc:%f, Test_Acc:%f' % (acc, valid_acc,
                                                           test_acc)
        print(buf)
        print2file(buf, logFile)
        buf = 'Optimize_Duration:%f, Metric_Duration:%f' % (duration_optimize,
                                                            duration_metric)
        print(buf)
        print2file(buf, logFile)

        # save the best model
        if valid_cost < bestValidCost:
            bestValidCost = valid_cost
            bestTestCost = test_cost
            bestTrainCost = cost
            bestEpoch = epoch
            bestTrainAcc = acc
            bestValidAcc = valid_acc
            bestTestAcc = test_acc

        torch.save(gram.state_dict(), outFile + f'.{epoch}')

    buf = 'Best Epoch:%d, Avg_Duration:%f, Train_Cost:%f, Valid_Cost:%f, Test_Cost:%f' % (
        bestEpoch, epochDuration / max_epochs, bestTrainCost, bestValidCost,
        bestTestCost)
    print(buf)
    print2file(buf, logFile)
    buf = 'Train_Acc:%f, Valid_Acc:%f, Test_Acc:%f' % (
        bestTrainAcc, bestValidAcc, bestTestAcc)
    print(buf)
    print2file(buf, logFile)

예제 #16

0

파일 보기

def test_whole_data(seqFile='seqFile.txt',
                    labelFile='labelFile.txt',
                    treeFile='tree.txt',
                    embFile='embFile.txt',
                    outFile='out.txt',
                    inputDimSize=100,
                    numAncestors=100,
                    embDimSize=100,
                    hiddenDimSize=200,
                    attentionDimSize=200,
                    max_epochs=100,
                    L2=0.,
                    numClass=26679,
                    batchSize=100,
                    dropoutRate=0.5,
                    logEps=1e-8,
                    verbose=True,
                    ignore_level=0):
    options = locals().copy()
    # get the best model through log
    # with open(outFile+'.log') as f:
    #     line = f.readlines()[-2]
    #     best_epoch = line.split(',')[0].split(':')[1]
    #     print('Best parameters occur epoch:', best_epoch)

    leavesList = []
    ancestorsList = []
    for i in range(5, 0, -1):
        leaves, ancestors = build_tree(treeFile + '.level' + str(i) + '.pk')
        leavesList.append(leaves)
        ancestorsList.append(ancestors)

    print('Loading the model ... ')
    # create the model
    gram = GRAM(inputDimSize, numAncestors, embDimSize, hiddenDimSize,
                attentionDimSize, numClass, dropoutRate, '').to(device)
    # read the best parameters
    # gram.load_state_dict(torch.load(outFile + '.' + best_epoch))
    gram.load_state_dict(torch.load(embFile))
    loss_fn = CrossEntropy()
    loss_fn.to(device)

    print('Loading the data ... ')
    dataset, _, _ = load_data(seqFile, labelFile, test_ratio=0, valid_ratio=0)
    typeFile = labelFile.split('.seqs')[0] + '.types'
    types = pickle.load(open(typeFile, 'rb'))
    rTypes = dict([(v, u) for u, v in types.items()])
    print('Data length:', len(dataset[0]))
    n_batches = int(np.ceil(float(len(dataset[0])) / float(batchSize)))

    print('Calculating the result ...')
    cost_vec = []
    acc_vec = []
    num_for_each_disease = defaultdict(float)
    TP_for_each_disease = defaultdict(float)
    rank_for_each_disease = defaultdict(float)

    for index in range(n_batches):
        batchX = dataset[0][index * batchSize:(index + 1) * batchSize]
        batchY = dataset[1][index * batchSize:(index + 1) * batchSize]
        x, y, mask, lengths = padMatrix(batchX, batchY, options)
        x = torch.from_numpy(x).to(device).float()
        mask = torch.from_numpy(mask).to(device).float()
        y_hat = gram(x, mask, leavesList, ancestorsList)
        y = torch.from_numpy(y).float().to(device)
        lengths = torch.from_numpy(lengths).float().to(device)
        loss, acc = loss_fn(y_hat, y, lengths)
        cost_vec.append(loss.item())
        acc_vec.append(acc)

        # Calculating the accuracy for each disease
        y_sorted, indices = torch.sort(y_hat, dim=2, descending=True)
        # indices = indices[:, :, :20]
        for i, j, k in torch.nonzero(y, as_tuple=False):
            k = k.item()
            num_for_each_disease[k] += 1
            # search the rank for k
            m = torch.nonzero(indices[i][j] == k,
                              as_tuple=False).view(-1).item()
            # calculate the top20 accuracy
            if m < 20:
                TP_for_each_disease[k] += 1
            rank_for_each_disease[k] += (m + 1)

    cost = np.mean(cost_vec)
    acc = np.mean(acc_vec)
    print('Whole data average loss:%f, average accuracy@20:%f,' % (cost, acc))

    print('Recording the accuracy for each disease ...')
    acc_out_file = outFile + '_all_acc.txt'
    # sort the disease by num
    num_for_each_disease = OrderedDict(
        sorted(num_for_each_disease.items(),
               key=lambda item: item[1],
               reverse=True))
    for disease in num_for_each_disease.keys():
        d_acc = TP_for_each_disease[disease] / num_for_each_disease[disease]
        avg_rank = rank_for_each_disease[disease] / num_for_each_disease[
            disease]
        buf = 'TypeNum:%d, icd_code:%s, Count:%d, avg_rank:%f, Accuracy:%f' % \
              (disease, rTypes[disease], num_for_each_disease[disease], avg_rank, d_acc)
        print2file(buf, acc_out_file)
    print('Done!')