Example #1
0
def slave_run_train(model, args, package, pid="None"):
    model.train()
    t = time.time()

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
    # optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)

    list_loss, list_acc = [], []

    for iter in range(args.slave_ep):
        if args.dataset == "Amazon":
            feature = torch.tensor(package["features"], dtype=torch.float32)
        else:
            feature = torch.eye(len(package["features"]), dtype=torch.float32)
        support = to_torch_sparse_tensor(coo_matrix((package["support"][1], (package["support"][0][:, 0],
                                                                                package["support"][0][:, 1])),
                                                    shape=package["support"][2]))
        label = torch.tensor(package["y_train"], dtype=torch.float32)
        mask = torch.tensor(package["train_mask"].astype(int), dtype=torch.float32)
        criterion = torch.nn.CrossEntropyLoss()

        if args.device >= 0:
            model = model.cuda()
            criterion = criterion.cuda()
            feature = feature.cuda()
            support = support.cuda()
            label = label.cuda().to(dtype=torch.int64)
            mask = mask.cuda()

        model.zero_grad()
        out = model(support, feature)

        loss, pred, acc = _metrics(out, label, mask, criterion, args.multilabel)

        # update model
        loss.backward()
        optimizer.step()

        # calculate F1 if needed.

        list_loss.append(loss.item())
        list_acc.append(acc.item())
        time_cost = time.time() - t

        # print(loss, acc)

    log_str = "Slave-" + str(pid) + " Done. Total time cost:" + str(time_cost) +\
              " average acc: " + str(sum(list_acc)/len(list_acc)) + ". average loss: " + \
              str(sum(list_loss) / len(list_loss))
    print2file(log_str, args.logDir, True)
    return {"params": model.cpu().state_dict(),
            "acc": sum(list_acc) / len(list_acc),
            "pred": pred,
            "out": out,
            "loss": sum(list_loss) / len(list_loss),
            "time": time_cost}
Example #2
0
def slave_run_evaluate(model, args, package, pid="None"):

    model.eval()
    t = time.time()

    if args.dataset == "Amazon":
        feature = torch.tensor(package["features"], dtype=torch.float32)
    else:
        feature = torch.eye(len(package["features"]), dtype=torch.float32)
    support = to_torch_sparse_tensor(coo_matrix((package["support"][1], (package["support"][0][:, 0],
                                                                            package["support"][0][:, 1])),
                                                shape=package["support"][2]))
    label = torch.tensor(package["y_train"], dtype=torch.float32)
    mask = torch.tensor(package["train_mask"].astype(int), dtype=torch.float32)
    criterion = torch.nn.CrossEntropyLoss()

    if args.device >= 0:
        model = model.cuda()
        criterion = criterion.cuda()
        feature = feature.cuda()
        support = support.cuda()
        label = label.cuda().to(dtype=torch.int64)
        mask = mask.cuda()

    out = model(support, feature)

    loss, pred, acc = _metrics(out, label, mask, criterion, args.multilabel)

    # list_loss.append(loss.item())
    # list_acc.append(acc.item())

    log_str = "Slave-" + str(pid) + " Done. Total time cost:" + str(time.time()-t) +\
              " average acc: " + str(acc.item()) + ". average loss: " + \
              str(loss.item())
    print2file(log_str, args.logDir, True)
    # print(log_str)
    return {"params": model.cpu().state_dict(),
            "acc": acc.item(),
            "pred": pred,
            "out": out,
            "loss": loss.item()}
Example #3
0
def train_net(net, optimizer, device, args, LOG_FILE, MODEL_FILE):
    # output regression information
    history = {
        'Train_loss': [],
        'Train_dice': [],
        'Train_other': [],
        'Valid_loss': [],
        'Valid_dice': [],
        'Valid_other': []
    }

    # scheduler
    if args.sch == 1:
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer,
            milestones=[args.epoch // 2, args.epoch * 3 // 4],
            gamma=0.35)
    elif args.sch == 2:
        scheduler = optim.lr_scheduler.CosineAnnealingLR(
            optimizer, args.epoch, 1e-4)

    val_dice_best = -float('inf')
    # main iteration
    for epoch in range(args.epoch):  # loop over the dataset multiple times
        net.train()
        running_loss, running_dice, running_other = 0.0, 0.0, 0.0
        tk0 = tqdm(enumerate(trainloader), total=len(trainloader), leave=False)

        # zero the gradient
        optimizer.zero_grad()
        # iterate over all samples
        for i, data in tk0:
            # get the inputs; data is a list of [inputs, labels]
            images = data[0].to(device).permute(0, 3, 1, 2)

            # forward + backward + optimize
            outputs = net(images)
            # do not accumulate the gradient
            if not args.accumulate:
                # different ways of handling the outputs
                loss = compute_loss(args, outputs, data, acc_step=1)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                batch_loss = loss.item()
            # do accumulation
            else:
                acc_step = 64 // args.batch
                loss = compute_loss(args, outputs, data, acc_step=acc_step)
                loss.backward()
                if (i + 1) % acc_step == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                batch_loss = loss.item() * acc_step

            # print statistics
            batch_dice, batch_other = evaluate_batch(data, outputs, args)
            running_loss += batch_loss
            running_dice += batch_dice
            running_other += batch_other
            tk0.set_postfix(
                info='Loss {:.3f}, Dice {:.3f}, Other {:.3f}'.format(
                    batch_loss, batch_dice, batch_other))

        # stochastic weight averaging
        if args.swa > 0 and epoch >= args.epoch - args.swa:
            epoch_tmp = args.epoch - args.swa
            if epoch == epoch_tmp:
                net_swa = copy.deepcopy(net.state_dict())
            else:
                for key, val in net_swa.items():
                    net_swa[key] = (
                        (epoch - epoch_tmp) * val +
                        net.state_dict()[key]) / (epoch - epoch_tmp + 1)

        # after every epoch, print the statistics
        net.eval()
        val_loss, val_dice, val_other = evaluate_loader(
            net, device, validloader, args)

        # save the best up to now
        if val_dice > val_dice_best:
            print('Improving val_dice from {:.3f} to {:.3f}, saving the model'.
                  format(val_dice_best / len(VALID_FILES) / args.category,
                         val_dice / len(VALID_FILES) / args.category))
            val_dice_best = val_dice
            torch.save(net.state_dict(), MODEL_FILE)

        # update the learning rate
        if args.sch > 0:
            scheduler.step()

        # update the history and output message
        history['Train_loss'].append(running_loss / len(trainloader))
        history['Valid_loss'].append(val_loss / len(validloader))
        history['Train_dice'].append(running_dice / len(TRAIN_FILES) /
                                     args.category)  # four categories
        history['Valid_dice'].append(val_dice / len(VALID_FILES) /
                                     args.category)
        history['Train_other'].append(running_other / len(TRAIN_FILES) /
                                      args.category)
        history['Valid_other'].append(val_other / len(VALID_FILES) /
                                      args.category)
        sout = '\nEpoch {:d} :'.format(epoch) + ' '.join(
            key + ':{:.3f}'.format(val[-1]) for key, val in history.items())
        print2file(sout, LOG_FILE)
        print(sout)
    if args.swa > 0:
        return net_swa, history
    else:
        return net.state_dict(), history
Example #4
0
    # load validation id
    X_valid = list(pd.read_csv('validID.csv')['Valid'])[:rows]
    X_train = list(set(np.arange(len(TRAIN_FILES_ALL))) - set(X_valid))[:rows]

    # get the train and valid files
    TRAIN_FILES = [TRAIN_FILES_ALL[i] for i in X_train]
    VALID_FILES = [TRAIN_FILES_ALL[i] for i in X_valid]

    steel_ds_valid = SteelDataset(VALID_FILES, args, mask_df=mask_df)
    stat_df_valid = steel_ds_valid.stat_images(rows)

    # print statistics
    sout = '======== Validation Stat ==========\n' + analyze_labels(
        stat_df_valid) + '\n'
    print2file(sout, LOG_FILE)

    # not using sophisticated normalize
    if not args.normalize:
        train_mean, train_std = 0, 1
        test_mean, test_std = 0, 1
    else:
        train_mean, train_std = 0.3438812517320016, 0.056746666005067205
        test_mean, test_std = 0.25951299299868136, 0.051800296725619116

    sout = 'Train/Test {:d}/{:d}\n'.format(len(TRAIN_FILES_ALL), len(TEST_FILES)) + \
      'Train mean/std {:.3f}/{:.3f}\n'.format(train_mean, train_std) + \
      'Test mean/std {:.3f}/{:.3f}\n'.format(test_mean, test_std) +\
      'Train num/sample {:d}'.format(len(TRAIN_FILES)) + ' '.join(TRAIN_FILES[:2]) + \
      '\nValid num/sample {:d}'.format(len(VALID_FILES)) + ' '.join(VALID_FILES[:2])+'\n'
    print2file(sout, LOG_FILE)
Example #5
0
                        default='data/wordvec_model',
                        type=str,
                        help='word vector model')
    parser.add_argument('-o',
                        '--output',
                        default='data/test',
                        type=str,
                        help='output file name')
    args = parser.parse_args()

    # dict = args.dict
    # wordvec_source = args.wordvec
    # tag = args.tag
    # id = args.carID
    # sql = '''select `comment` from order_reviews where carID = %s''' % id
    # pydb = mydb.get_db()
    # comments = pydb.exec_sql(sql)
    # comments = [c['comment'] for c in comments]
    # make_tag = tagging(dict,wordvec_source,tag)

    # make_tag = tagging('test_dict','data/wordvec_model','artificial_tag')
    # comments = [
    #     '驾驶轻松动力足,gps等配备齐全,空间巨大,坐五个人一点都不挤。后备箱也超大,只有塞不满没有装不下。',
    #     '车况好 商务车 车主特别好',
    #     '车主人好,车也很好用。车辆省油,整洁干净,车主和气,很好',
    #     '车很好开,车主人也很好说话~',
    #     '感觉这车开的挺舒服的,车主和PP网都挺好的,下次还选PP,加油!'
    # ]
    # result = map(make_tag.tag_comments,comments)
    utils.print2file('test', result)
             int(x['Class 2'] != 0) + \
             int(x['Class 3'] != 0) + \
             int(x['Class 4'] != 0) != 0, axis=1))
        # save the statistics
        X_train, X_valid, _, _ = train_test_split(np.arange(stat_df.shape[0]),
                                                  labels,
                                                  test_size=0.16,
                                                  random_state=1234)
        valid_df = pd.DataFrame({'Valid': X_valid})
        valid_df.to_csv(VALID_ID_FILE)
        stat_df_valid = stat_df.iloc[X_valid, :]

        # print statistics
        sout =  '\n========   Train Stat ==========\n' + analyze_labels(stat_df.iloc[X_train,:]) +\
          '======== Validation Stat ==========\n' + analyze_labels(stat_df_valid)+'\n'
        print2file(sout, LOG_FILE)

        # plot the distributions
        fig, axs = plt.subplots(1, 2, figsize=(16, 5))
        sns.distplot(stat_df['mean'], ax=axs[0], kde_kws={"label": "Train"})
        axs[0].set_title('Distribution of mean')
        sns.distplot(stat_df['std'], ax=axs[1], kde_kws={"label": "Train"})
        axs[1].set_title('Distribution of std')
        sns.distplot(stat_df_test['mean'],
                     ax=axs[0],
                     kde_kws={"label": "Test"})
        sns.distplot(stat_df_test['std'], ax=axs[1], kde_kws={"label": "Test"})
        plt.savefig('../output/Distribution.png')

        # get the train and valid files
        TRAIN_FILES = [TRAIN_FILES_ALL[i] for i in X_train]
Example #7
0
    parser = argparse.ArgumentParser(description='tag comments')
    parser.add_argument('-i', '--carID',type=str,help ='car ID')
    parser.add_argument('-t', '--tag', default= 'data/tags',type=str,help ='tag file path')
    parser.add_argument('-u', '--dict', default= 'data/udf_dict',type=str,help ='UDF dict path')
    parser.add_argument('-w', '--wordvec', default= 'data/wordvec_model',type=str,help ='word vector model')
    parser.add_argument('-o', '--output', default= 'data/test',type=str,help ='output file name')
    args = parser.parse_args()


    # dict = args.dict
    # wordvec_source = args.wordvec
    # tag = args.tag
    # id = args.carID
    # sql = '''select `comment` from order_reviews where carID = %s''' % id
    # pydb = mydb.get_db()
    # comments = pydb.exec_sql(sql)
    # comments = [c['comment'] for c in comments]
    # make_tag = tagging(dict,wordvec_source,tag)


    # make_tag = tagging('test_dict','data/wordvec_model','artificial_tag')
    # comments = [
    #     '驾驶轻松动力足,gps等配备齐全,空间巨大,坐五个人一点都不挤。后备箱也超大,只有塞不满没有装不下。',
    #     '车况好 商务车 车主特别好',
    #     '车主人好,车也很好用。车辆省油,整洁干净,车主和气,很好',
    #     '车很好开,车主人也很好说话~',
    #     '感觉这车开的挺舒服的,车主和PP网都挺好的,下次还选PP,加油!'
    # ]
    # result = map(make_tag.tag_comments,comments)
    utils.print2file('test',result)
Example #8
0
def main():
    print("Program start, environment initializing ...")
    torch.autograd.set_detect_anomaly(True)
    args = parameter_parser()
    utils.print2file(str(args), args.logDir, True)

    if args.device >= 0:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(args.device)

    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    pic = {}

    # check if pickles, otherwise load data
    # pickle_name = args.data_prefix+args.dataset+"-"+str(args.bsize)+"-"+str(args.num_clusters)+"_main"+".pickle"
    # if os.path.isfile(pickle_name):
    #     print("Loading Pickle.")
    #     load_time = time.time()
    #     pic = pickle.load(open(pickle_name, "rb"))
    #     print("Loading Done. " + str(time.time()-load_time) + " seconds.")
    # else:
    if True:
        print("Data Pre-processing")
        # Load data
        (pic["train_adj"], full_adj, pic["train_feats"], pic["test_feats"],
         pic["y_train"], y_val, y_test, pic["train_mask"], pic["val_mask"],
         test_mask, _, pic["val_data"], pic["test_data"], num_data,
         visible_data) = utils.load_data(args.data_prefix,
                                         args.dataset,
                                         args.precalc,
                                         amazon=True)

        print("Partition graph and do preprocessing")
        if args.bsize > 1:
            _, pic["parts"] = partition_utils.partition_graph(
                pic["train_adj"], visible_data, args.num_clusters)
            pic["parts"] = [np.array(pt) for pt in pic["parts"]]

            (pic["features_batches"], pic["support_batches"],
             pic["y_train_batches"],
             pic["train_mask_batches"]) = utils.preprocess_multicluster_v2(
                 pic["train_adj"], pic["parts"], pic["train_feats"],
                 pic["y_train"], pic["train_mask"], args.num_clusters,
                 args.bsize, args.diag_lambda)

        else:
            (pic["parts"], pic["features_batches"], pic["support_batches"],
             pic["y_train_batches"],
             pic["train_mask_batches"]) = utils.preprocess(
                 pic["train_adj"], pic["train_feats"], pic["y_train"],
                 pic["train_mask"], visible_data, args.num_clusters,
                 args.diag_lambda)

        (_, pic["val_features_batches"], pic["val_support_batches"],
         pic["y_val_batches"], pic["val_mask_batches"]) = utils.preprocess(
             full_adj, pic["test_feats"], y_val, pic["val_mask"],
             np.arange(num_data), args.num_clusters_val, args.diag_lambda)

        (_, pic["test_features_batches"], pic["test_support_batches"],
         pic["y_test_batches"], pic["test_mask_batches"]) = utils.preprocess(
             full_adj, pic["test_feats"], y_test, test_mask,
             np.arange(num_data), args.num_clusters_test, args.diag_lambda)

        # pickle.dump(pic, open(pickle_name, "wb"))

    idx_parts = list(range(len(pic["parts"])))
    print("Preparing model ...")
    model = StackedGCN(args,
                       pic["test_feats"].shape[1],
                       pic["y_train"].shape[1],
                       precalc=args.precalc,
                       num_layers=args.num_layers,
                       norm=args.layernorm)

    w_server = model.cpu().state_dict()

    print("Start training ...")
    model_saved = "./model/" + args.dataset + "-" + args.logDir[6:-4] + ".pt"

    try:
        for epoch in range(args.epochs):
            # Training process
            w_locals, loss_locals, epoch_acc = [], [], []
            all_time = []
            best_val_acc = 0

            for pid in range(len(pic["features_batches"])):
                # for pid in range(10):
                # Use preprocessed batch data
                package = {
                    "features": pic["features_batches"][pid],
                    "support": pic["support_batches"][pid],
                    "y_train": pic["y_train_batches"][pid],
                    "train_mask": pic["train_mask_batches"][pid]
                }

                model.load_state_dict(w_server)
                out_dict = slave_run_train(model, args, package, pid)

                w_locals.append(copy.deepcopy(out_dict['params']))
                loss_locals.append(copy.deepcopy(out_dict['loss']))
                all_time.append(out_dict["time"])
                epoch_acc.append(out_dict["acc"])

            # update global weights
            a_start_time = time.time()
            if args.agg == 'avg':
                w_server = average_agg(w_locals, args.dp)
            elif args.agg == 'att':
                w_server = weighted_agg(w_locals,
                                        w_server,
                                        args.epsilon,
                                        args.ord,
                                        dp=args.dp)
            else:
                exit('Unrecognized aggregation')

            model.load_state_dict(w_server)
            # agg_time = time.time() - a_start_time
            # print(str(sum(all_time)/len(all_time) + agg_time))
            print2file(
                'Epoch: ' + str(epoch) + ' Average Train acc: ' +
                str(sum(epoch_acc) / len(epoch_acc)), args.logDir, True)

            if epoch % args.val_freq == 0:
                val_cost, val_acc, val_micro, val_macro = evaluate(
                    model,
                    args,
                    pic["val_features_batches"],
                    pic["val_support_batches"],
                    pic["y_val_batches"],
                    pic["val_mask_batches"],
                    pic["val_data"],
                    pid="validation")

                log_str = 'Validateion set results: ' + 'cost= {:.5f} '.format(
                    val_cost) + 'accuracy= {:.5f} '.format(
                        val_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(
                            val_micro, val_macro)
                print2file(log_str, args.logDir, True)

                if val_acc > best_val_acc:
                    best_val_acc = val_acc
                    torch.save(model.state_dict(), model_saved)
                    print2file(
                        "Best val_acc: " + str(best_val_acc) +
                        " with epoch: " + str(epoch), args.logDir, True)

        torch.save(
            model.state_dict(),
            "./model/" + args.dataset + "-" + args.logDir[6:-4] + "Done.pt")
        print2file("Training Done. Model Saved.", args.logDir, True)
        # Test Model
        # Perform two test, one with last model, another with best val_acc model
        # 1)
        test_cost, test_acc, micro, macro = evaluate(
            model,
            args,
            pic["test_features_batches"],
            pic["test_support_batches"],
            pic["y_test_batches"],
            pic["test_mask_batches"],
            pic["test_data"],
            pid="Final test")

        log_str = 'Test set results: ' + 'cost= {:.5f} '.format(
            test_cost) + 'accuracy= {:.5f} '.format(
                test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro)
        print2file(log_str, args.logDir, True)

        # 2)
        test_model = StackedGCN(args,
                                pic["test_feats"].shape[1],
                                pic["y_train"].shape[1],
                                precalc=args.precalc,
                                num_layers=args.num_layers,
                                norm=args.layernorm)
        test_model.load_state_dict(torch.load(model_saved))
        test_model.eval()
        test_cost, test_acc, micro, macro = evaluate(
            test_model,
            args,
            pic["test_features_batches"],
            pic["test_support_batches"],
            pic["y_test_batches"],
            pic["test_mask_batches"],
            pic["test_data"],
            pid="Best test")

        log_str = 'Test set results: ' + 'cost= {:.5f} '.format(
            test_cost) + 'accuracy= {:.5f} '.format(
                test_acc) + 'mi F1= {:.5f} ma F1= {:.5f}'.format(micro, macro)
        print2file(log_str, args.logDir, True)

    except KeyboardInterrupt:
        print("==" * 20)
        print("Existing from training earlier than the plan.")

    print("End..so far so good.")