Exemple #1
0
def val(val_loader, model, device, flip=False):
    '''
    model evaluation
    :param: val_loader: dataloader, model: cpkt, device:device, flip: bool
    :return: float
    '''
    model.eval()
    with torch.no_grad():
        score = []
        for datas, ages, sexs, labels in val_loader:
            datas = datas.to(device)
            ages = ages.to(device)
            sexs = sexs.to(device)
            labels = labels.to(device)
            outputs = model(datas, ages, sexs)
            if flip:
                datas_flip = datas.flip([2])
                outputs_flip = model(datas_flip)
                outputs_mean = torch.add(outputs, outputs_flip) / 2
                x = calc_f1(labels, outputs_mean)
            else:
                x = calc_f1(labels, outputs)
            score.append(x)
        test_acc = sum(score) / len(score)
    return test_acc
Exemple #2
0
 def train_epoch(self, model, optimizer, criterion):
     model.train()
     f1_meter, loss_meter, it_count = 0, 0, 0
     for inputs, target in tqdm(self.trn_dataloader):
         inputs = inputs.to(self.device)
         target = target.to(self.device)
         # zero the parameter gradients
         optimizer.zero_grad()
         # forward
         output = model(inputs)
         # print("output:", output)
         loss = criterion(output, target)
         loss.backward()
         optimizer.step()
         loss_meter += loss.item()
         it_count += 1
         # print("output: \t target:".format(output, target))
         # print("shape of output:", output.size())
         # print("shape of target:", target.size())
         f1 = utils.calc_f1(target, torch.sigmoid(output))
         # print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1))
         f1_meter += f1
         if it_count != 0 and it_count % self.show_interval == 0:
             print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1))
     return loss_meter / it_count, f1_meter / it_count
Exemple #3
0
def evaluate(sess, model, val_features_batches, val_support_batches,
             y_val_batches, val_mask_batches, val_data, placeholders):
  """evaluate GCN model."""
  total_pred = []
  total_lab = []
  total_loss = 0
  total_acc = 0

  num_batches = len(val_features_batches)
  for i in range(num_batches):
    features_b = val_features_batches[i]
    support_b = val_support_batches[i]
    y_val_b = y_val_batches[i]
    val_mask_b = val_mask_batches[i]
    num_data_b = np.sum(val_mask_b)
    if num_data_b == 0:
      continue
    else:
      feed_dict = utils.construct_feed_dict(features_b, support_b, y_val_b,
                                            val_mask_b, placeholders)
      outs = sess.run([model.loss, model.accuracy, model.outputs],
                      feed_dict=feed_dict)

    total_pred.append(outs[2][val_mask_b])
    total_lab.append(y_val_b[val_mask_b])
    total_loss += outs[0] * num_data_b
    total_acc += outs[1] * num_data_b

  total_pred = np.vstack(total_pred)
  total_lab = np.vstack(total_lab)
  loss = total_loss / len(val_data)
  acc = total_acc / len(val_data)

  micro, macro = utils.calc_f1(total_pred, total_lab, FLAGS.multilabel)
  return loss, acc, micro, macro
Exemple #4
0
def test(model, criterion, features, adj, labels, mask, device):
    features = torch.FloatTensor(features).to(device)
    labels = torch.LongTensor(labels).to(device)
    total_correct = 0
    if device == torch.device("cpu"):
        adj = adj[0]
        features = features[0]
        labels = labels[0]
        mask = mask[0]

    # Adj -> Torch Sparse Tensor
    i = torch.LongTensor(adj[0])  # indices
    v = torch.FloatTensor(adj[1])  # values
    adj = torch.sparse.FloatTensor(i.t(), v, adj[2]).to(device)
    model.to(device)
    output = model(adj, features)
    if args.multilabel:
        loss = criterion(output, labels.type_as(output))
        pred = torch.sigmoid(output) >= 0.5
        total_correct += torch.eq(pred.squeeze(), labels.squeeze()).all(dim=1).sum().item()
    else:
        loss = criterion(output, torch.max(labels, 1)[1])
        pred = output[mask].argmax(dim=1, keepdim=True)
        labels = torch.max(labels[mask], 1)[1]
        total_correct += torch.eq(pred.squeeze(), labels.squeeze()).sum().item()
    acc = total_correct / sum(mask)
    micro, macro = utils.calc_f1(pred.squeeze(), labels.squeeze(), args.multilabel)
    return loss.item(), acc, micro, macro
Exemple #5
0
def val(val_loader, model_list, model_weight, device, flip=False):
    '''
    model evaluation
    :param: val_loader: dataloader, model_list: list, model_weight: list, device:device, flip: bool
    :return: float
    '''
    model_weight = torch.Tensor(model_weight).to(device)
    with torch.no_grad():
        score_list = []
        for datas, ages, sexs, labels in tqdm(val_loader):
            datas = datas.to(device)
            ages = ages.to(device)
            sexs = sexs.to(device)
            labels = labels.to(device)
            output_list = []
            for index, model in enumerate(model_list):
                outputs = model(datas, ages, sexs)
                if flip:
                    datas_flip = datas.flip([2])
                    outputs_flip = model(datas_flip, ages, sexs)
                    outputs_mean = torch.add(outputs, outputs_flip) / 2
                    outputs = outputs_mean
                weight = model_weight[index]
                outputs *= weight
                output_list.append(outputs)
            outputs_mean = torch.sum(torch.stack(output_list), dim=0)
            score = calc_f1(labels, outputs_mean)
            score_list.append(score)
        test_acc = sum(score_list) / len(score_list)
        print('Test Accuracy: {}/{}={} %'.format(sum(score_list),
                                                 len(score_list), test_acc),
              flush=True)
    return test_acc
Exemple #6
0
def train_epoch(model,
                optimizer,
                criterion,
                train_dataloader,
                show_interval=10):
    model.train()
    f1_meter, loss_meter, it_count = 0, 0, 0
    for inputs, fr, target in train_dataloader:
        inputs = inputs.to(device)
        target = target.to(device)
        fr = fr.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        if config.kind == 1:
            output = model(inputs, fr)
        elif config.kind == 2:
            output, _ = model(inputs)
        else:
            output = model(inputs)
        if config.kind == 2 and config.top4_DeepNN_tag:
            output = output[:, config.top4_tag_list]
            target = target[:, config.top4_tag_list]
        loss = criterion(
            output, target)  # BCEWithLogitsLoss, 先对output进行sigmoid,然后求BCELoss
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        it_count += 1
        output = torch.sigmoid(output)
        f1 = utils.calc_f1(target, output)
        f1_meter += f1
        if it_count != 0 and it_count % show_interval == 0:
            print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1))
    return loss_meter / it_count, f1_meter / it_count
Exemple #7
0
def train_beat_epoch(model,
                     optimizer,
                     criterion,
                     train_dataloader,
                     show_interval=10):
    model.train()
    f1_meter, loss_meter, it_count = 0, 0, 0
    for inputs, beat, target in train_dataloader:
        inputs = inputs.to(device)
        beat = beat.to(device)
        target = target.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        output = model(inputs, beat)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        it_count += 1
        f1 = utils.calc_f1(target, torch.sigmoid(output))
        f1_meter += f1
        if it_count != 0 and it_count % show_interval == 0:
            print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1))
    return loss_meter / it_count, f1_meter / it_count
Exemple #8
0
def train_epoch(model,
                optimizer,
                criterion,
                train_dataloader,
                show_interval=10):
    model.train()
    f1_meter, loss_meter, it_count = 0, 0, 0
    for inputs, target in train_dataloader:
        target, sex, age = splitTarget(target)
        inputs = inputs.to(device)
        target, sex, age = target.to(device), sex.to(device), age.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        if config.fuse == 'False':
            output = model(inputs)
        elif config.fuse == 'True':
            output = model(inputs, sex, age)
        else:
            raise ValueError('Not supported choise for \'config.fuse\' item!')
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        it_count += 1
        f1 = utils.calc_f1(target, torch.sigmoid(output))
        f1_meter += f1
        if it_count != 0 and it_count % show_interval == 0:
            print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1))
    # 为什么使用f1分数来选择模型?原因在于f1分数是评价标准而不是loss
    return loss_meter / it_count, f1_meter / it_count  # average loss and average f1
Exemple #9
0
def train_epoch(model,
                optimizer,
                criterion,
                train_dataloader,
                epoch,
                lr,
                best_f1,
                show_interval=10):
    model.train()
    f1_meter, loss_meter, it_count = 0, 0, 0
    tq = tqdm.tqdm(total=len(train_dataloader) * config.batch_size)
    tq.set_description('epoch %d, lr %.4f, best_f:%.4f' % (epoch, lr, best_f1))

    for i, (inputs, target) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        target = target.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        output = model(inputs)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        it_count += 1
        f1 = utils.calc_f1(target, torch.sigmoid(output))
        f1_meter += f1
        tq.update(config.batch_size)
        tq.set_postfix(loss="%.4f   f1:%.3f" % (loss.item(), f1))
    tq.close()
    #if it_count != 0 and it_count % show_interval == 0:
    #        print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1), end='\r')
    return loss_meter / it_count, f1_meter / it_count
Exemple #10
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter, loss_meter, it_count = 0, 0, 0
    with torch.no_grad():
        if torch.cuda.is_available():
            label_all = torch.Tensor().cuda()
            pred_all = torch.Tensor().cuda()
        else:
            label_all = torch.Tensor()
            pred_all = torch.Tensor()
        tq = tqdm.tqdm(total=len(val_dataloader) * config.batch_size)
        for inputs, target in val_dataloader:
            inputs = inputs.to(device)
            target = target.to(device)
            output = model(inputs)
            it_count += 1
            label_all = torch.cat((label_all, target), 0)
            pred_all = torch.cat((pred_all, output), 0)
            tq.update(config.batch_size)
        tq.close()
        output = pred_all
        target = label_all
        loss = criterion(output, target)
        loss_meter = loss.item()

        output = torch.sigmoid(output)
        f1 = utils.calc_f1(target, output, threshold)
        # f1_meter += f1
    return loss_meter, f1
Exemple #11
0
def evaluate(model, args, val_features_batches, val_support_batches,
             y_val_batches, val_mask_batches, val_data, pid="None"):
    """evaluate GCN model."""
    total_pred = []
    total_lab = []
    total_out = []
    total_loss = 0
    total_acc = 0

    num_batches = len(val_features_batches)
    for i in range(num_batches):

        features_b = val_features_batches[i]
        support_b = val_support_batches[i]
        y_val_b = y_val_batches[i]
        val_mask_b = val_mask_batches[i]
        num_data_b = np.sum(val_mask_b)

        if num_data_b == 0:
            continue
        else:
            package = {
                "features": features_b,
                "support": support_b,
                "y_train": y_val_b,
                "train_mask": val_mask_b
            }

            out_dict = slave_run_evaluate(model, args, package, pid=pid)

        total_pred.append(out_dict["pred"].cpu().detach().numpy()[val_mask_b])
        total_out.append(out_dict["out"].cpu().detach().numpy()[val_mask_b])
        total_lab.append(y_val_b[val_mask_b])

        # total_pred.append(out_dict["pred"].cpu().tolist())
        # total_out.append(out_dict["out"].cpu().tolist())
        # total_lab.append(y_val_b[val_mask_b])

        # total_pred.append(out_dict["pred"][val_mask_b].cpu().tolist())
        # total_out.append(out_dict["out"][val_mask_b].cpu().tolist())
        # total_lab.append(y_val_b[val_mask_b])

        total_loss += out_dict["loss"] * num_data_b
        total_acc += out_dict["acc"] #* num_data_b

    total_pred = np.vstack(total_pred)
    total_out = np.vstack(total_out)
    total_lab = np.vstack(total_lab)
    loss = total_loss / len(val_data)
    acc = total_acc / num_batches

    micro, macro = calc_f1(total_pred, total_lab, args.multilabel)

    return loss, acc, micro, macro
Exemple #12
0
def ada_boost(x_train, y_train, x_test, y_test):
    print('#ADA Boost Testing\n\n')
    shift_y_train = shiftData(y_train)
    shift_y_test = shiftData(y_test)

    train_accuracy = []
    test_accuracy = []
    train_f1 = []
    test_f1 = []
    parameter = []

    for num_trees in range(1, 15, 1):
        print("Testing with ", num_trees, " trees")
        ada = AdaBoostClassifier(num_trees, 1)
        ada.train(x_train, shift_y_train)

        preds_train = ada.predict(x_train)
        preds_test = ada.predict(x_test)
        parameter.append(num_trees)
        train_accuracy.append(accuracy_score(preds_train, shift_y_train))
        test_accuracy.append(accuracy_score(preds_test, shift_y_test))
        train_f1.append(calc_f1(preds_train, shift_y_train))
        test_f1.append(calc_f1(preds_test, shift_y_test))

    f1 = plt.figure(1)
    plt.plot(parameter, train_accuracy)
    plt.plot(parameter, test_accuracy)
    plt.title("ADA Boost Accuracy vs Number of Trees")
    plt.ylabel("Accuracy")
    plt.xlabel("Number of Trees")
    plt.legend(['Training Accuracy', 'Testing Accuracy'])
    f1.show()

    f2 = plt.figure(2)
    plt.plot(parameter, train_f1)
    plt.plot(parameter, test_f1)
    plt.title("ADA Boost F1 vs Number of Trees")
    plt.ylabel("F1")
    plt.xlabel("Number of Trees")
    plt.legend(['Training F1', 'Testing F1'])
    plt.show()
Exemple #13
0
def validate(val_loader, model):
    data_time = AverageMeter()
    microF1 = AverageMeter()
    test_p1, test_p3, test_p5 = 0, 0, 0
    test_ndcg1, test_ndcg3, test_ndcg5 = 0, 0, 0
    model.eval()
    with torch.no_grad():
        end = time.time()
        for batch_idx, (input, target) in enumerate(val_loader):
            data_time.update(time.time() - end)

            input = input.cuda()
            target = target.cuda()
            output = model(input)
            target = target.data.cpu().float()
            output = output.data.cpu()
            _p1, _p3, _p5 = precision_k(output.topk(k=5)[1].numpy(),
                                        target.numpy(),
                                        k=[1, 3, 5])
            test_p1 += _p1
            test_p3 += _p3
            test_p5 += _p5

            _ndcg1, _ndcg3, _ndcg5 = ndcg_k(output.topk(k=5)[1].numpy(),
                                            target.numpy(),
                                            k=[1, 3, 5])
            test_ndcg1 += _ndcg1
            test_ndcg3 += _ndcg3
            test_ndcg5 += _ndcg5

            output[output > 0.5] = 1
            output[output <= 0.5] = 0
            micro, macro = calc_f1(target, output)
            microF1.update(micro.item(), input.size(0))

        np.set_printoptions(formatter={'float': '{: 0.4}'.format})
        print('the result of micro: \n', microF1.avg)
        test_p1 /= len(val_loader)
        test_p3 /= len(val_loader)
        test_p5 /= len(val_loader)

        test_ndcg1 /= len(val_loader)
        test_ndcg3 /= len(val_loader)
        test_ndcg5 /= len(val_loader)

        print("precision@1 : %.4f , precision@3 : %.4f , precision@5 : %.4f " %
              (test_p1, test_p3, test_p5))
        print("ndcg@1 : %.4f , ndcg@3 : %.4f , ndcg@5 : %.4f " %
              (test_ndcg1, test_ndcg3, test_ndcg5))
        return (microF1.avg)
Exemple #14
0
def random_forsest_random_seed(x_train, y_train, x_test, y_test, count):
    print('#Random Forest Number of Trees\n\n')
    accuracy_training = []
    accuracy_testing = []
    f1_testing = []
    f1_training = []
    features = []
    for i in range(0, count):
        rclf = RandomForestClassifier(max_depth=7,
                                      max_features=25,
                                      n_trees=151)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        features.append(i)
        accuracy_training.append(accuracy_score(preds_train, y_train))
        accuracy_testing.append(accuracy_score(preds_test, y_test))
        f1_training.append(calc_f1(preds_train, y_train))
        f1_testing.append(calc_f1(preds_test, y_test))

    f1 = plt.figure(1)
    plt.plot(features, accuracy_training)
    plt.plot(features, accuracy_testing)
    plt.title("Accuracy vs Seed")
    plt.ylabel("Accuracy")
    plt.xlabel("Seed Index")
    plt.legend(['Training Accuracy', 'Testing Accuracy'])
    f1.show()

    f2 = plt.figure(2)
    plt.plot(features, f1_training)
    plt.plot(features, f1_testing)
    plt.title("F1 vs Seed")
    plt.ylabel("F1")
    plt.xlabel("Seed Index")
    plt.legend(['Training F1', 'Testing F1'])
    plt.show()
Exemple #15
0
def random_forest_testing_max_features(x_train, y_train, x_test, y_test):
    print('#Random Forest Number of Trees\n\n')
    accuracy_training = []
    accuracy_testing = []
    f1_testing = []
    f1_training = []
    features = []
    for max_features in [1, 2, 5, 8, 10, 20, 25, 35, 50]:
        rclf = RandomForestClassifier(max_depth=7,
                                      max_features=max_features,
                                      n_trees=50)
        rclf.fit(x_train, y_train)
        preds_train = rclf.predict(x_train)
        preds_test = rclf.predict(x_test)
        features.append(max_features)
        accuracy_training.append(accuracy_score(preds_train, y_train))
        accuracy_testing.append(accuracy_score(preds_test, y_test))
        f1_training.append(calc_f1(preds_train, y_train))
        f1_testing.append(calc_f1(preds_test, y_test))

    f1 = plt.figure(1)
    plt.plot(features, accuracy_training)
    plt.plot(features, accuracy_testing)
    plt.title("Accuracy vs Max Features")
    plt.ylabel("Accuracy")
    plt.xlabel("Max Features")
    plt.legend(['Training Accuracy', 'Testing Accuracy'])
    f1.show()

    f2 = plt.figure(2)
    plt.plot(features, f1_training)
    plt.plot(features, f1_testing)
    plt.title("F1 vs Max Features")
    plt.ylabel("F1")
    plt.xlabel("Max Features")
    plt.legend(['Training F1', 'Testing F1'])
    plt.show()
Exemple #16
0
def evaluate(model, criterion, features_batches, support_batches, labels_batches, mask_batches, nodes, device):
    # Evaluate model
    total_pred = []
    total_lab = []
    total_loss = 0
    total_acc = 0
    total_nodes = 0

    num_batches = len(features_batches)
    for i in range(num_batches):
        features_b = features_batches[i]
        support_b = support_batches[i]
        label_b = labels_batches[i]
        mask_b = mask_batches[i]
        num_data_b = np.sum(mask_b)
        if num_data_b == 0:
            continue
        else:
            # evaluate function
            features = torch.from_numpy(features_b).to(device)
            labels = torch.LongTensor(label_b).to(device)
            i = torch.LongTensor(support_b[0])  # indices
            v = torch.FloatTensor(support_b[1])  # values
            adj = torch.sparse.FloatTensor(i.t(), v, support_b[2]).to(device)
            model.to(device)
            output = model(adj, features)
            if args.multilabel:
                loss = criterion(output[mask_b], labels[mask_b].type_as(output))
                pred = output[mask_b]
                pred[pred > 0] = 1
                pred[pred <= 0] = 0
                labels = labels[mask_b]
                total_acc += torch.eq(pred, labels).all(dim=1).sum().item()
            else:
                loss = criterion(output[mask_b], torch.max(labels[mask_b], 1)[1])
                pred = output[mask_b].argmax(dim=1, keepdim=True)
                labels = torch.max(labels[mask_b], 1)[1]
                total_acc += torch.eq(pred.squeeze(), labels.squeeze()).sum().item()
            total_nodes += num_data_b
        total_pred.append(pred)
        total_lab.append(labels)
        total_loss += loss.item()

    total_pred = torch.cat(total_pred).cpu().squeeze().numpy()
    total_lab = torch.cat(total_lab).cpu().squeeze().numpy()
    loss = total_loss / num_batches
    acc = total_acc / total_nodes
    micro, macro = utils.calc_f1(total_pred, total_lab, args.multilabel)
    return loss, acc, micro, macro
Exemple #17
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter, loss_meter, it_count = 0, 0, 0
    with torch.no_grad():
        for inputs, target in val_dataloader:
            inputs = inputs.to(device)
            target = target.to(device)
            output = model(inputs)
            loss = criterion(output, target)
            loss_meter += loss.item()
            it_count += 1
            output = torch.sigmoid(output)
            f1 = utils.calc_f1(target, output, threshold)
            f1_meter += f1
    return loss_meter / it_count, f1_meter / it_count
Exemple #18
0
def decision_tree_testing_depth(x_train, y_train, x_test, y_test, min, max):
    print('#Decision Tree Depth Testing\n\n')
    accuracyTrain = np.zeros(max - min)
    accuracyTest = np.zeros(max - min)
    f1Train = np.zeros(max - min)
    f1Test = np.zeros(max - min)
    depths = np.arange(min, max)
    index = 0
    for depth in depths:
        clf = DecisionTreeClassifier(max_depth=depth)
        clf.fit(x_train, y_train)
        preds_train = clf.predict(x_train)
        preds_test = clf.predict(x_test)
        accuracyTrain[index] = accuracy_score(preds_train, y_train)
        accuracyTest[index] = accuracy_score(preds_test, y_test)
        preds = clf.predict(x_test)
        f1Test[index] = calc_f1(preds_train, y_train)
        f1Train[index] = calc_f1(preds_test, y_test)
        index += 1
    f1 = plt.figure(1)
    plt.plot(depths, accuracyTrain)
    plt.plot(depths, accuracyTest)
    plt.title("accuracy vs number of trees")
    plt.ylabel("Accuracy")
    plt.xlabel("Depth")
    plt.legend(['Training Accuracy', 'Testing Accuracy'])
    f1.show()

    f2 = plt.figure(2)
    plt.plot(depths, f1Train)
    plt.plot(depths, f1Test)
    plt.title("F1 vs number of trees")
    plt.ylabel("F1")
    plt.xlabel("Depth")
    plt.legend(['Training F1', 'Testing F1'])
    plt.show()
Exemple #19
0
def fine_tuning(train_loader, model, criterion, optimizer):
    batch_time = AverageMeter()
    data_time = AverageMeter()
    losses = AverageMeter()
    microF1 = AverageMeter()
    macroF1 = AverageMeter()
    model.train()

    end = time.time()
    bar = Bar('Training', max=len(train_loader))
    for batch_idx, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        input = input.cuda()
        target = target.cuda()
        output = model(input)
        loss = criterion(output, target.float())
        target = target.data.cpu().float()
        output = output.data.cpu()

        micro, macro = calc_f1(target, output)

        losses.update(loss.item(), input.size(0))
        microF1.update(micro.item(), input.size(0))
        macroF1.update(macro.item(), input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        batch_time.update(time.time() - end)
        end = time.time()

        model.weight_norm()
        bar.suffix = '({batch}/{size}) Data: {data:.3f}s | Batch: {bt:.3f}s | Total: {total:} | ETA: {eta:} | Loss: {loss:.4f} | Micro-f1: {microF1: .4f} |Macro-f1: {macroF1: .4f}'.format(
            batch=batch_idx + 1,
            size=len(train_loader),
            data=data_time.val,
            bt=batch_time.val,
            total=bar.elapsed_td,
            eta=bar.eta_td,
            loss=losses.avg,
            microF1=microF1.avg,
            macroF1=macroF1.avg,
        )
        bar.next()
    bar.finish()
    return (losses.avg, microF1.avg, macroF1.avg)
Exemple #20
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter, loss_meter, it_count = 0, 0, 0
    with torch.no_grad():
        if torch.cuda.is_available():
            label_all = torch.Tensor().cuda()
            pred_all = torch.Tensor().cuda()
        else:
            label_all = torch.Tensor()
            pred_all = torch.Tensor()
#        tq = tqdm.tqdm(total=len(val_dataloader) * config.batch_size)
        for inputs, target in val_dataloader:
            inputs = inputs.to(device)
            target = target.to(device)
            output, _ = model(inputs)
            it_count += 1
            label_all = torch.cat((label_all, target), 0)
            pred_all = torch.cat((pred_all, output), 0)


#            tq.update(config.batch_size)
#        tq.close()
        output = pred_all
        target = label_all
        loss = criterion(output, target)
        loss_meter = loss.item()

        output = torch.sigmoid(output)
        if args.model_kind == 1:
            f1 = utils.calc_f1(target, output, threshold)
        else:
            f1 = utils2.calc_f1(target, output, threshold)
        acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1(
            target, output, threshold)

        fout = open('log.txt', 'a+', encoding='utf-8')
        fout.write('\n' + '*' * 20 + '\n')
        fout.write('acc:' + str(acc) + '\n')
        fout.write('true_positives:' + str(true_positives) + '\n')
        fout.write('real_positives:' + str(real_positives) + '\n')
        fout.write('predicted_positives:' + str(predicted_positives) + '\n')
        fout.close()

        # f1_meter += f1
    return loss_meter, f1, target, output
Exemple #21
0
def train_epoch(model, optimizer, criterion, train_dataloader, epoch, lr,
                best_f1, val_dataloader, model_save_dir, state, round_):
    model.train()
    f1_meter, loss_meter, it_count = 0, 0, 0
    #tq = tqdm.tqdm(total=len(train_dataloader)*config.batch_size)
    #tq.set_description('epoch %d, lr %.4f, best_f:%.4f' % (epoch, lr, best_f1))

    for i, (inputs, target) in enumerate(train_dataloader):
        inputs = inputs.to(device)
        target = target.to(device)
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward
        output, _ = model(inputs)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        loss_meter += loss.item()
        it_count += 1
        if args.model_kind == 1:
            f1 = utils.calc_f1(target, output, 0.5)
        else:
            f1 = utils2.calc_f1(target, output, 0.5)
        f1_meter += f1
        #tq.update(config.batch_size)

        if epoch > round_ and i % 127 == 126:
            val_loss, val_f1, _, _ = val_epoch(model, criterion,
                                               val_dataloader)
            if best_f1 < val_f1:
                best_f1 = val_f1
                state['state_dict'] = model.state_dict()
                save_ckpt(state, True, model_save_dir)
#                print('save best')
            print('#epoch:%02d  val_loss:%0.3e val_f1:%.3f' %
                  (epoch, val_loss, val_f1))

        #tq.set_postfix(loss="%.4f   f1:%.3f" % (loss.item(), f1))
    #tq.close()
    #if it_count != 0 and it_count % show_interval == 0:


#        print("%d,loss:%.3e f1:%.3f" % (it_count, loss.item(), f1), end='\r')
    return loss_meter / it_count, f1_meter / it_count, best_f1
def evaluate(sess, model, val_features_batches, val_support_batches,
             y_val_batches, val_mask_batches, val_data, placeholders,
             clusters_adj):
    """evaluate GCN model."""
    total_pred = []
    total_lab = []
    total_loss = 0
    total_acc = 0

    num_batches = len(val_features_batches)
    for batch_id in range(num_batches):
        features_b = val_features_batches[batch_id]
        support_b = val_support_batches[batch_id]
        y_val_b = y_val_batches[batch_id]
        val_mask_b = val_mask_batches[batch_id]
        num_data_b = np.sum(val_mask_b)
        if clusters_adj is not None:
            cluster_adj = clusters_adj[batch_id]
        if num_data_b == 0:
            continue
        else:
            feed_dict = utils.construct_feed_dict(features_b, support_b,
                                                  y_val_b, val_mask_b,
                                                  placeholders)
            outs = sess.run([model.loss, model.accuracy, model.outputs],
                            feed_dict=feed_dict)

        total_pred.append(outs[2][val_mask_b])
        total_lab.append(y_val_b[val_mask_b])
        total_loss += outs[0] * num_data_b
        total_acc += outs[1] * num_data_b

    total_pred = np.vstack(total_pred)
    total_lab = np.vstack(total_lab)
    # import pdb; pdb.set_trace()
    sp.save_npz(f'cluster/clusters_adj', clusters_adj)
    np.save(f'cluster/cluster_y', total_lab)
    np.save(f'cluster/total_pred', total_pred)
    loss = total_loss / len(val_data)
    acc = total_acc / len(val_data)

    micro, macro = utils.calc_f1(total_pred, total_lab, FLAGS.multilabel)
    return loss, acc, micro, macro
Exemple #23
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter,acc_meter,recall_meter,precision_meter,loss_meter, it_count = 0, 0, 0,0,0,0
    with torch.no_grad():
        for inputs, target in val_dataloader:
            inputs = inputs.to(device)
            target = target.to(device)
            output = model(inputs)
            loss = criterion(output, target)
            loss_meter += loss.item()
            it_count += 1
            output = torch.sigmoid(output)
            f1 = utils.calc_f1(target, output, threshold)
            acc = utils.cal_accuracy_score(target,output)
            recall = utils.cal_recall_score(target,output)
            precision = utils.cal_percision_score(target,output)
            f1_meter += f1
            acc_meter +=acc
            recall_meter +=recall
            precision_meter += precision
    return loss_meter / it_count, f1_meter / it_count , acc_meter / it_count , recall_meter / it_count, precision_meter / it_count
Exemple #24
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter, loss_meter, it_count = 0, 0, 0
    with torch.no_grad():
        for inputs, target in val_dataloader:
            inputs = inputs.to(device)
            target, sex, age = splitTarget(target)
            target, sex, age = target.to(device), sex.to(device), age.to(
                device)
            if config.fuse == 'False':
                output = model(inputs)
            elif config.fuse == 'True':
                output = model(inputs)
            else:
                raise ValueError(
                    'Not supported choise for \'config.fuse\' item in test phase!'
                )
            loss = criterion(output, target)
            loss_meter += loss.item()
            it_count += 1
            output = torch.sigmoid(output)
            f1 = utils.calc_f1(target, output, threshold)
            f1_meter += f1
    return loss_meter / it_count, f1_meter / it_count
Exemple #25
0
def val_epoch(model, criterion, val_dataloader, threshold=0.5):
    model.eval()
    f1_meter, loss_meter, it_count = 0, 0, 0
    with torch.no_grad():
        for inputs, fr, target in val_dataloader:
            inputs = inputs.to(device)
            target = target.to(device)
            fr = fr.to(device)
            if config.kind == 1:
                output = model(inputs, fr)
            elif config.kind == 2:
                output, _ = model(inputs)
            else:
                output = model(inputs)
            if config.kind == 2 and config.top4_DeepNN_tag:
                output = output[:, config.top4_tag_list]
                target = target[:, config.top4_tag_list]
            loss = criterion(output, target)
            loss_meter += loss.item()
            it_count += 1
            output = torch.sigmoid(output)
            f1 = utils.calc_f1(target, output, threshold)
            f1_meter += f1
    return loss_meter / it_count, f1_meter / it_count
Exemple #26
0
    if (args.command == "val"):
        if torch.cuda.is_available():
            label_all = torch.Tensor().cuda()
            pred_all = torch.Tensor().cuda()
        else:
            label_all = torch.Tensor()
            pred_all = torch.Tensor()
        for i in range(5):
            #if i!=3:
            #    continue
            config.train_data = 'path/train'
            args.fold = i
            target, output = val(args)
            label_all = torch.cat((label_all, target), 0)
            pred_all = torch.cat((pred_all, output), 0)
        f1 = utils.calc_f1(label_all, pred_all, 0.5)
        acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1(
            label_all, pred_all, 0.5)

        fout = open('log.txt', 'a+', encoding='utf-8')
        fout.write('\n' + '*' * 20 + '\n')
        fout.write('acc:' + str(acc) + '\n')
        fout.write('true_positives:' + str(true_positives) + '\n')
        fout.write('real_positives:' + str(real_positives) + '\n')
        fout.write('predicted_positives:' + str(predicted_positives) + '\n')
        fout.close()
        #        acc, true_positives, real_positives, predicted_positives = utils.calc_acc_f1(target, output, 0.5)
        print('f1:%.4f' % (f1))

    if (args.command == "check"):
Exemple #27
0
def fine_tuning(train_loader, model, criterion, optimizer):
    F1 = np.zeros(54)
    score_micro = np.zeros(3)
    score_macro = np.zeros(3)
    data_time = AverageMeter()
    losses = AverageMeter()
    microF1 = AverageMeter()
    macroF1 = AverageMeter()
    model.train()
    test_p1, test_p3, test_p5 = 0, 0, 0
    test_ndcg1, test_ndcg3, test_ndcg5 = 0, 0, 0

    end = time.time()
    # bar = Bar('Training', max=len(train_loader))
    for batch_idx, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)
        input = input.cuda()
        target = target.cuda()
        output = model(input)
        loss = criterion(output, target.float())
        target = target.data.cpu().float()
        output = output.data.cpu()

        micro, macro = calc_f1(target, output)
        losses.update(loss.item(), input.size(0))
        microF1.update(micro.item(), input.size(0))
        macroF1.update(macro.item(), input.size(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        _p1, _p3, _p5 = precision_k(output.topk(k=5)[1].numpy(),
                                    target.numpy(),
                                    k=[1, 3, 5])
        test_p1 += _p1
        test_p3 += _p3
        test_p5 += _p5

        _ndcg1, _ndcg3, _ndcg5 = ndcg_k(output.topk(k=5)[1].numpy(),
                                        target.numpy(),
                                        k=[1, 3, 5])
        test_ndcg1 += _ndcg1
        test_ndcg3 += _ndcg3
        test_ndcg5 += _ndcg5
        output[output > 0.5] = 1
        output[output <= 0.5] = 0
        for l in range(54):
            F1[l] += f1_score(target[:, l], output[:, l], average='binary')
            # precision[l] += precision_score(target[:, l], output[:, l], average='binary')
            # recall[l] += recall_score(target[:, l], output[:, l], average='binary')
        # micro, macro = calc_f1(target, output)
        # acc += accuracy_score(target, output)
        # print("acc",acc)
        score_micro += [
            precision_score(target, output, average='micro'),
            recall_score(target, output, average='micro'),
            f1_score(target, output, average='micro')
        ]
        score_macro += [
            precision_score(target, output, average='macro'),
            recall_score(target, output, average='macro'),
            f1_score(target, output, average='macro')
        ]
        # acc = calc_acc(target, output)
    np.set_printoptions(formatter={'float': '{: 0.4}'.format})
    print('the result of F1: \n', F1 / len(train_loader))
    print('the result of micro: \n', score_micro / len(train_loader))
    print('the result of macro: \n', score_macro / len(train_loader))
    test_p1 /= len(train_loader)
    test_p3 /= len(train_loader)
    test_p5 /= len(train_loader)

    test_ndcg1 /= len(train_loader)
    test_ndcg3 /= len(train_loader)
    test_ndcg5 /= len(train_loader)

    print("precision@1 : %.4f , precision@3 : %.4f , precision@5 : %.4f " %
          (test_p1, test_p3, test_p5))
    print("ndcg@1 : %.4f , ndcg@3 : %.4f , ndcg@5 : %.4f " %
          (test_ndcg1, test_ndcg3, test_ndcg5))
Exemple #28
0
def main(args):
    torch.manual_seed(args.rnd_seed)
    np.random.seed(args.rnd_seed)
    random.seed(args.rnd_seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    multitask_data = set(['ppi'])
    multitask = args.dataset in multitask_data

    # load and preprocess dataset
    assert args.dataset == 'amazon2m'
    g, graph_labels = load_graphs(
        '/yushi/dataset/Amazon2M/Amazon2M_dglgraph.bin')
    assert len(g) == 1
    g = g[0]
    data = g.ndata
    labels = torch.LongTensor(data['label'])
    if hasattr(torch, 'BoolTensor'):
        train_mask = data['train_mask'].bool()
        val_mask = data['val_mask'].bool()
        test_mask = data['test_mask'].bool()

    train_nid = np.nonzero(train_mask.cpu().numpy())[0].astype(np.int64)
    val_nid = np.nonzero(val_mask.cpu().numpy())[0].astype(np.int64)

    # Normalize features
    features = torch.FloatTensor(data['feat'])
    if args.normalize:
        train_feats = features[train_nid]
        scaler = sklearn.preprocessing.StandardScaler()
        scaler.fit(train_feats)
        features = scaler.transform(features)
    features = torch.FloatTensor(features)

    in_feats = features.shape[1]
    n_classes = 47
    n_edges = g.number_of_edges()

    n_train_samples = train_mask.int().sum().item()
    n_val_samples = val_mask.int().sum().item()
    n_test_samples = test_mask.int().sum().item()

    print("""----Data statistics------'
    #Edges %d
    #Classes %d
    #Train samples %d
    #Val samples %d
    #Test samples %d""" %
          (n_edges, n_classes,
           n_train_samples,
           n_val_samples,
           n_test_samples))
    # create GCN model
    if args.self_loop:
        print("adding self-loop edges")
        g = add_self_loop(g)
    # g = DGLGraph(g, readonly=True)

    # set device for dataset tensors
    if args.gpu < 0:
        cuda = False
        raise ValueError('no cuda')
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
        features = features.cuda()
        labels = labels.cuda()
        train_mask = train_mask.cuda()
        val_mask = val_mask.cuda()
        test_mask = test_mask.cuda()

    print(torch.cuda.get_device_name(0))

    g.ndata['features'] = features
    g.ndata['labels'] = labels
    g.ndata['train_mask'] = train_mask
    print('labels shape:', labels.shape)
    train_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize, args.batch_size, train_nid, use_pp=args.use_pp)
    val_cluster_iterator = ClusterIter(
        args.dataset, g, args.psize_val, 1, val_nid, use_pp=False)

    print("features shape, ", features.shape)
    model = GraphSAGE(in_feats,
                      args.n_hidden,
                      n_classes,
                      args.n_layers,
                      F.relu,
                      args.dropout,
                      args.use_pp)

    if cuda:
        model.cuda()

    # logger and so on
    log_dir = save_log_dir(args)
    writer = SummaryWriter(log_dir)
    logger = Logger(os.path.join(log_dir, 'loggings'))
    logger.write(args)

    # Loss function
    if multitask:
        print('Using multi-label loss')
        loss_f = nn.BCEWithLogitsLoss()
    else:
        print('Using multi-class loss')
        loss_f = nn.CrossEntropyLoss()

    # use optimizer
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # set train_nids to cuda tensor
    if cuda:
        train_nid = torch.from_numpy(train_nid).cuda()
    print("current memory after model before training",
          torch.cuda.memory_allocated(device=train_nid.device) / 1024 / 1024)
    start_time = time.time()
    best_f1 = -1

    for epoch in range(args.n_epochs):
        for j, cluster in enumerate(train_cluster_iterator):
            # sync with upper level training graph
            cluster.copy_from_parent()
            model.train()
            # forward
            pred = model(cluster)
            batch_labels = cluster.ndata['labels']
            batch_train_mask = cluster.ndata['train_mask']
            loss = loss_f(pred[batch_train_mask],
                          batch_labels[batch_train_mask])

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # in PPI case, `log_every` is chosen to log one time per epoch.
            # Choose your log freq dynamically when you want more info within one epoch
            if j % args.log_every == 0:
                print(f"epoch:{epoch}/{args.n_epochs}, Iteration {j}/"
                      f"{len(train_cluster_iterator)}:training loss", loss.item())
                writer.add_scalar('train/loss', loss.item(),
                                  global_step=j + epoch * len(train_cluster_iterator))
        print("current memory:",
              torch.cuda.memory_allocated(device=pred.device) / 1024 / 1024)

        # evaluate
        if epoch % args.val_every == 0:
            total_f1_mic = []
            total_f1_mac = []
            model.eval()
            for j, cluster in enumerate(val_cluster_iterator):
                cluster.copy_from_parent()
                with torch.no_grad():
                    logits = model(cluster)
                    batch_labels = cluster.ndata['labels']
                    # batch_val_mask = cluster.ndata['val_mask']
                    val_f1_mic, val_f1_mac = calc_f1(batch_labels.cpu().numpy(),
                                                        logits.cpu().numpy(), multitask)
                total_f1_mic.append(val_f1_mic)
                total_f1_mac.append(val_f1_mac)

            val_f1_mic = np.mean(total_f1_mic)
            val_f1_mac = np.mean(total_f1_mac)

            print(
                "Val F1-mic{:.4f}, Val F1-mac{:.4f}". format(val_f1_mic, val_f1_mac))
            if val_f1_mic > best_f1:
                best_f1 = val_f1_mic
                print('new best val f1:', best_f1)
                torch.save(model.state_dict(), os.path.join(
                    log_dir, 'best_model.pkl'))
            writer.add_scalar('val/f1-mic', val_f1_mic, global_step=epoch)
            writer.add_scalar('val/f1-mac', val_f1_mac, global_step=epoch)

    end_time = time.time()
    print(f'training using time {start_time-end_time}')

    # test
    if args.use_val:
        model.load_state_dict(torch.load(os.path.join(
            log_dir, 'best_model.pkl')))
Exemple #29
0
            label = ner
            ner = nn.utils.rnn.pad_sequence(ner, batch_first=True).type(
                torch.LongTensor)
            ner = ner.cuda()
            with torch.no_grad():
                pred = model(X, mask_X, length)
                loss = model.cal_loss(X, mask_X, length, label=ner)
            for i, item in enumerate(pred):
                pred_set.append(item[0:length.cpu().numpy()[i]])
            #pred_set.extend(pred)
            for item in label:
                label_set.append(item.numpy())
            valid_loss += loss.item()
        valid_loss = valid_loss / len(dev_X)

        acc, recall, f1, pred_result, label_result = calc_f1(
            pred_set, label_set, data_manager.ner_list)
        INFO = 'epoch %d, train loss %f, valid loss %f, acc %f, recall %f, f1 %f ' % (
            epoch, train_loss, valid_loss, acc, recall, f1)
        logging.info(INFO)
        print(INFO)
        if epoch == 0:
            break

    pred_result = cal_ner_result(pred_set, data_manager.ner_list)
    label_result = cal_ner_result(label_set, data_manager.ner_list)
    #acc,recall,f1,pred_result,label_result = calc_f1(pred_set, label_set, dev_X, data_manager.ner_list)
    #INFO = 'epoch %d, train loss %f, valid loss %f, acc %f, recall %f, f1 %f '% (epoch, train_loss, valid_loss,acc,recall,f1)
    #logging.info(INFO)
    #print(INFO)
    #print(INFO+'\t'+INFO_THRE)
    # 正负样本分析