Exemple #1
0
def test(args):
    with open('data/word_dict.pkl','rb') as file:
        word_dict= pkl.load(file)
    args.num_vocab = word_dict.word_num
    test_set = dataloader.MyDataset('data/test_data.pkl', None, feature_path='./data/test_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len)
    test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)
    ckpt=torch.load("./model/best_transformer_248.pt")
    # model_args=type('args', (object,), ckpt['model_args'])
    # print(model_args)
    quora_model = trfencoder.Transformer(num_layer=3,middle_dim=10,modeldim=args.embedding_dim,dff=args.hidden_size*4)

    # quora_model = trfencoder.Transformer(model_args)#num_layer=args.num_layers,middle_dim=10,modeldim=args.embedding_dim,dff=args.hidden_size*4)

    quora_model.to(device)
    quora_model.load_state_dict(ckpt["model_state_dict"])
    embeddings = word_dict.embedding
    embeddings.to(device)

    preds=[]






    for i,(data,feature) in enumerate(test_iter):
        data=data.to(device)
        data=embeddings(data)
        outputs=quora_model(data,feature)
        pred=torch.argmax(outputs,dim=1).cpu().numpy().tolist()
        preds.extend(pred)
    print(preds)

    with open("data/json_ans.json", 'w') as fr:
        json.dump(preds, fr)
        print("test ans saved")
def visualize(model_path):
    quora_model = TestModel(args)

    # with open(model_path,'rb') as file:
    check_point = torch.load(model_path)

    quora_model.load_state_dict(check_point['model_state_dict'])

    with open('data/word_dict.pkl', 'rb') as file:
        word_dict = pkl.load(file)

    embeddings = word_dict.embedding
    # optimizer = optim.Adam(quora_model.parameters(), lr=args.lr)
    criterion = nn.CrossEntropyLoss().to(
        device)  #weight = torch.Tensor([bias, 1 - bias]).to(device))

    embeddings.to(device)
    quora_model.to(device)
    torch.load(model_path)

    valid_set = dataloader.MyDataset('data/valid_data.pkl',
                                     'data/valid_label.pkl',
                                     word_dict=word_dict,
                                     max_len=args.max_len)
    valid_iter = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=False)
    print(len(valid_set))
    attention = []
    ans = []

    with torch.no_grad():
        f1s = []
        for i, (data, label) in enumerate(valid_iter):
            data, label = data.to(device), label.to(device)
            idx = padding_idx(data, 62706)
            word = data
            data = embeddings(data)

            outputs = quora_model(data, idx)
            pred = list(np.argmax(outputs.cpu().numpy(), axis=-1))
            ans = ans + pred
            # print(quora_model.att.size())
            attention.append(quora_model.att.squeeze(-1).cpu().data.numpy())
            f1 = F1score(outputs, label)
            f1s.append(f1)
            if i % 1000 == 0:
                print('attention:', attention[i][0, :])
                print('word', word[0, :])

        val_f1 = sum(f1s) / len(f1s)
        print('val_f1:', val_f1)

    attention = np.vstack(attention)

    # print('attention:',attention[0:1,:])
    with open('./data/attention.pkl', 'wb') as file:
        pkl.dump(attention, file)
    with open('./data/ans.pkl', 'wb') as file:
        pkl.dump(ans, file)
    print(len(ans))
Exemple #3
0
def main(args):
    with open('data/word_dict.pkl', 'rb') as f:
        word_dict = pkl.load(f)
    embeddings = word_dict.embedding
    embeddings.to(device)
    valid_set = dataloader.MyDataset(
        'data/valid_data.pkl',
        'data/valid_label.pkl',
        feature_path="./data/valid_words_attributes_100.pkl",
        word_dict=word_dict,
        max_len=args.max_len)
    test_set = dataloader.MyDataset(
        'data/test_data.pkl',
        None,
        feature_path="./data/test_words_attributes_100.pkl",
        word_dict=word_dict,
        max_len=args.max_len)
    valid_iter = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=False)
    test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)

    model_types = arg.model
    model_paths = arg.model_path
    checkpoints = [torch.load(path) for path in model_paths]
    model_args = [
        type('args', (object, ), c['model_args']) for c in checkpoints
    ]
    for ind, checkpoint in enumerate(checkpoints):
        for key, value in checkpoint["model_args"].items():
            setattr(model_args[ind], key, value)
    eval_models = []
    for i, typ in enumerate(model_types):
        if typ == "test":
            eval_model = model.TestModel(model_args[i])
            eval_model.to(device)
            eval_model.load_state_dict(checkpoints[i]["model_state_dict"])
            eval_models.append(eval_model)
        elif typ == "quo":
            eval_model = model.QuoraModel(model_args[i])
            eval_model.to(device)
            eval_model.load_state_dict(checkpoints[i]["model_state_dict"])
            eval_models.append(eval_model)

    with torch.no_grad():
        #valid set
        f1s = []
        mats = np.array([[0, 0], [0, 0]])
        predicts = torch.tensor([], device=device).long()
        for i, (data, label, feature) in enumerate(valid_iter):
            data, label, feature = data.to(device), label.to(
                device), feature.to(device)
            data = embeddings(data)
            final_output = torch.tensor([], device=device)
            for eval_model in eval_models:
                eval_model.eval()
                outputs = eval_model(data, feature)
                final_output = torch.cat((final_output, outputs), 0)
            outputs = torch.mean(
                final_output.view(-1, outputs.size(0), outputs.size(1)), 0)
            f1, mat, predicted = F1score(outputs, label)
            predicts = torch.cat((predicts, predicted), 0)
            f1s.append(f1)
            mats += mat
        val_f1 = sum(f1s) / len(f1s)
        print("validation F1 score: %.3f" % (val_f1))
        print("confusion matrix: \n", mats)
        with open("model/" + args.save + "evaluation.txt", 'w') as f:
            f.write(",".join([str(c) for c in predicts.cpu().tolist()]))

        #test set
        submit_ans = torch.tensor([], device=device).long()
        for data, feature in test_iter:
            data, feature = data.to(device), feature.to(device)
            data = embeddings(data)
            final_output = torch.tensor([], device=device)
            for eval_model in eval_models:
                outputs = eval_model(data, feature)
                final_output = torch.cat((final_output, outputs), 0)
            outputs = torch.mean(
                final_output.view(-1, outputs.size(0), outputs.size(1)), 0)
            _, predicted = torch.max(F.softmax(outputs, dim=1), 1)
            submit_ans = torch.cat((submit_ans, predicted), 0)
        with open("data/json_ans.json", 'w') as fr:
            json.dump(submit_ans.cpu().numpy().tolist(), fr)
            print("test ans saved")
Exemple #4
0
def train(quora_model, args, word_dict):
    args_dict = dict((name, getattr(args(), name)) for name in dir(args()) if not name.startswith("__"))
    train_set = dataloader.MyDataset('data/train_data.pkl','data/train_label.pkl',feature_path="./data/train_words_attributes_100.pkl", word_dict=word_dict,max_len=args.max_len)
    valid_set = dataloader.MyDataset('data/valid_data.pkl','data/valid_label.pkl','./data/valid_words_attributes_100.pkl', word_dict=word_dict,max_len=args.max_len)
    test_set = dataloader.MyDataset('data/test_data.pkl', None, feature_path='./data/test_words_attributes_100.pkl', word_dict=word_dict, max_len=args.max_len)
    valid_iter = DataLoader(valid_set,batch_size=args.batch_size,shuffle=False)
    test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)
    bias = train_set.labels.sum().item() / len(train_set.labels)
    with open('data/train_label.pkl', 'rb') as file:
        train_labels = pkl.load(file)

    rate_0vs1 = float(len(train_labels) - sum(train_labels)) / float(sum(train_labels))
    rate_0vs1 *= args.sample_weight
    weights = [rate_0vs1 if label == 1 else 1 for label in train_labels]
    from torch.utils.data.sampler import WeightedRandomSampler
    sampler = WeightedRandomSampler(weights, num_samples=len(train_labels), replacement=True)

    train_iter=DataLoader(train_set,batch_size=args.batch_size,shuffle=False, sampler=sampler)
    
    len_train_iter = len(train_iter)
    embeddings = word_dict.embedding
    optimizer = optim.Adam(quora_model.parameters(), lr=args.lr)
    # optimizer=trfencoder.NoamOpt(args.embedding_dim, 1, args.dcstep,
    #                             torch.optim.Adam(quora_model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=args.lr_decay)
    criterion = nn.CrossEntropyLoss()
    lossn = 0
    start_time = time.time()
    best_score = 0.0
    for epoch in range(args.num_epochs):
        running_loss = 0
        quora_model.train()
        for i, (data, label, feature) in enumerate(train_iter):
            data, label, feature = data.to(device), label.to(device), feature.to(device)
#            print(data.size(),label.size(),feature.size(),"="*20+"data\n",data,"="*20+'label\n',label,"="*20+"feature\n",feature)
            data = embeddings(data)
            quora_model.zero_grad()

            outputs = quora_model(data, feature)
            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()

            f1 = F1score(outputs, label)
            
            running_loss += loss.item()

            if i % args.disp_freq == 0 and i > 0:
                average_loss = running_loss / args.disp_freq
                lossn += 1
                if pargs.visdom:
                    vis.line(np.array([[average_loss]]), np.array([lossn]), win="loss", update="append")
                print("%.3f training loss: %.3f training F1 score: %.3f" % (i / len_train_iter, average_loss, f1))

                dur_time = time.time() - start_time
                print("%s / %s batches trained, %d batches /s" % (i, len_train_iter, args.disp_freq / dur_time))
                start_time = time.time()

                running_loss = 0

                if i % (args.disp_freq * 10) == 0:
                    quora_model.eval()
                    with torch.no_grad():
                        f1s = []
                        for i, (data, label, feature) in enumerate(valid_iter):
                            data, label, feature = data.to(device), label.to(device), feature.to(device)
                            data = embeddings(data)

                            outputs = quora_model(data, feature)
                            f1 = F1score(outputs, label)
                            f1s.append(f1)
                        val_f1 = sum(f1s) / len(f1s)
                        if pargs.visdom:
                            vis.line(np.array([[val_f1]]), np.array([lossn]), win="f1", update="append")
                        print("EPOCH %s, %s validation F1 score: %.3f, previous best score: %.3f" % (epoch, lossn, val_f1, best_score))
                        start_time = time.time()

                    if val_f1 < best_score:
                        # pass
                        scheduler.step()
                    else:
                        best_score = val_f1
    
                        torch.save({
                            "model_args": args_dict,
                            "model_state_dict": quora_model.state_dict()
                            }, "model/" +"best"+ args.name + "_" + quora_model.name + "_" + str(lossn) +  ".pt")
                    quora_model.train()
                    print("current lr: %s" % scheduler.get_lr())
Exemple #5
0
def train(args):
    print('Start')

    if torch.cuda.is_available():
        device = 'cuda'
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        device = 'cpu'

    hair_colors = [
        "blonde hair", "brown hair", "black hair", "blue hair", "pink hair",
        "purple hair", "green hair", "red hair", "silver hair", "white hair",
        "orange hair", "aqua hair", "grey hair"
    ]

    image_tag_df = pd.read_csv(os.path.join(args.data_dir, 'image_tag.csv'))
    image_tag_df = image_tag_df.dropna(how='any')

    all_img_path_list = glob.glob(args.data_dir + '/face_images/*/*.png')
    all_img_name_list = [
        all_img_path.lstrip(args.data_dir + '/face_images/')
        for all_img_path in all_img_path_list
    ]

    img_name_list = list(
        set(image_tag_df['image name']) & set(all_img_name_list))
    img_path_list = [
        args.data_dir + '/face_images/' + img_name
        for img_name in img_name_list
    ]

    n_iter = 0
    lr = args.lr
    beta1 = args.beta1
    beta2 = args.beta2
    train_epoch = args.train_epoch
    n_dis = args.n_dis
    batch_size = args.batch_size
    num_classes = len(hair_colors)

    gen_num_features = args.gen_num_features
    gen_dim_z = args.gen_dim_z
    gen_bottom_width = args.gen_bottom_width
    gen_distribution = args.gen_distribution

    dis_num_features = args.dis_num_features

    dataset = dataloader.MyDataset(img_name_list, img_path_list, image_tag_df,
                                   hair_colors)
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=False)

    gen = model.ResNetGenerator(gen_num_features,
                                gen_dim_z,
                                gen_bottom_width,
                                activation=F.relu,
                                num_classes=num_classes).to(device)

    dis = model.SNResNetProjectionDiscriminator(dis_num_features, num_classes,
                                                F.relu).to(device)

    opt_gen = optim.Adam(gen.parameters(), lr, (beta1, beta2))
    opt_dis = optim.Adam(dis.parameters(), lr, (beta1, beta2))

    gen_criterion = model.GenLoss()
    dis_criterion = model.DisLoss()

    for epoch in range(train_epoch):
        print('Epoch : ', epoch)

        for x_batch, y_batch in train_loader:
            n_iter += 1
            print('n_iter : ', n_iter)

            for i in range(n_dis):
                if i == 0:
                    fake, pseudo_y, _ = dataloader.sample_from_gen(
                        num_classes, batch_size, gen_dim_z, device, gen)
                    dis_fake = dis(fake, pseudo_y)
                    loss_gen = gen_criterion(dis_fake, None)

                    gen.zero_grad()
                    loss_gen.backward()
                    opt_gen.step()

                fake, pseudo_y, _ = dataloader.sample_from_gen(
                    num_classes, batch_size, gen_dim_z, device, gen)
                real, y = x_batch.type(
                    torch.float32).to(device), y_batch.to(device)
                dis_fake, dis_real = dis(fake, pseudo_y), dis(real, y)
                loss_dis = dis_criterion(dis_fake, dis_real)

                dis.zero_grad()
                loss_dis.backward()
                opt_dis.step()

        if epoch % 1 == 0:
            save_model(gen, args.model_dir, epoch)
Exemple #6
0
def train(args):
    if torch.cuda.is_available():
        device = torch.device('cuda')
        torch.set_default_tensor_type('torch.cuda.FloatTensor')
    else:
        device = torch.device('cpu')
        torch.set_default_tensor_type('torch.FloatTensor')

    img_path_list = glob.glob(args.data_dir + '/face_images/*/*.png')

    n_iter = 0
    lr = args.lr
    beta1 = args.beta1
    beta2 = args.beta2
    train_epoch = args.train_epoch
    n_dis = args.n_dis
    batch_size = args.batch_size
    coef = args.coef

    gen_num_features = args.gen_num_features
    gen_dim_z = args.gen_dim_z
    gen_bottom_width = args.gen_bottom_width

    dis_num_features = args.dis_num_features

    wd_list = []

    gen = model.ResNetGenerator(gen_num_features, gen_dim_z, gen_bottom_width,
                                F.relu).to(device)
    dis = model.ResNetDiscriminator(dis_num_features, F.relu).to(device)

    opt_gen = optim.Adam(gen.parameters(), lr=lr, betas=(beta1, beta2))
    opt_dis = optim.Adam(dis.parameters(), lr=lr, betas=(beta1, beta2))

    dataset = dataloader.MyDataset(img_path_list)
    train_loader = torch.utils.data.DataLoader(dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               pin_memory=False)

    for epoch in range(train_epoch):
        print('Epoch: ', epoch)

        for batch in train_loader:
            if batch.shape[0] != batch_size:
                print('Skip')
                continue

            n_iter += 1
            print('n_iter: ', n_iter)

            for i in range(n_dis):
                if i == 0:
                    gen.zero_grad()
                    fake, _ = dataloader.sample_from_gen(
                        batch_size, gen_dim_z, gen, device)
                    dis_fake = dis(fake).mean()
                    loss_gen = loss.cal_loss_gen(dis_fake)
                    loss_gen.backward()
                    opt_gen.step()

                dis.zero_grad()
                fake, _ = dataloader.sample_from_gen(batch_size, gen_dim_z,
                                                     gen, device)
                real = batch.type(torch.float32).to(device)
                dis_fake, dis_real = dis(fake).mean(), dis(real).mean()
                gradient_penalty = loss.cal_gradient_penalty(
                    dis, real, fake, coef, device)
                loss_dis = loss.cal_loss_dis(dis_fake, dis_real,
                                             gradient_penalty)
                loss_dis.backward()
                opt_dis.step()

        if epoch % 1 == 0:
            save_model(gen, args.model_dir, epoch)
def train(quora_model, args, word_dict):
    args_dict = dict((name, getattr(args(), name)) for name in dir(args())
                     if not name.startswith("__"))
    train_set = dataloader.MyDataset(
        'data/train_data.pkl',
        'data/train_label.pkl',
        feature_path="./data/train_words_attributes_100.pkl",
        word_dict=word_dict,
        max_len=args.max_len)
    valid_set = dataloader.MyDataset('data/valid_data.pkl',
                                     'data/valid_label.pkl',
                                     './data/valid_words_attributes_100.pkl',
                                     word_dict=word_dict,
                                     max_len=args.max_len)
    test_set = dataloader.MyDataset(
        'data/test_data.pkl',
        None,
        feature_path='./data/test_words_attributes_100.pkl',
        word_dict=word_dict,
        max_len=args.max_len)
    valid_iter = DataLoader(valid_set,
                            batch_size=args.batch_size,
                            shuffle=False)
    test_iter = DataLoader(test_set, batch_size=args.batch_size, shuffle=False)
    bias = train_set.labels.sum().item() / len(train_set.labels)
    with open('data/train_label.pkl', 'rb') as file:
        train_labels = pkl.load(file)

    rate_0vs1 = float(len(train_labels) - sum(train_labels)) / float(
        sum(train_labels))

    embeddings = word_dict.embedding
    optimizer = optim.Adam(quora_model.parameters(), lr=args.lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=3,
                                          gamma=args.lr_decay)
    criterion = nn.CrossEntropyLoss()
    lossn = 0
    losses = []
    training_f1s = []
    val_f1s = []
    start_time = time.time()
    best_score = 0.0
    for epoch in range(args.num_epochs):
        print("EPOCH %s: sample_weight: %.3f" %
              (epoch + 1, args.sample_weight))
        train_iter = get_train_iter(rate_0vs1, args, train_set, train_labels)
        len_train_iter = len(train_iter)
        args.sample_weight *= args.sample_weight_decay
        running_loss = 0
        quora_model.train()
        precision, recall = 0, 0
        for i, (data, label, feature) in enumerate(train_iter):
            data, label, feature = data.to(device), label.to(
                device), feature.to(device)
            data = embeddings(data)

            quora_model.zero_grad()

            outputs = quora_model(data, feature)
            loss = criterion(outputs, label)
            loss.backward()
            optimizer.step()

            p, r, f1 = F1score(outputs, label)
            precision += p
            recall += r

            running_loss += loss.item()

            if i % args.disp_freq == 0 and i > 0:
                average_loss = running_loss / args.disp_freq
                lossn += 1
                losses.append(average_loss)
                if pargs.visdom:
                    vis.line(np.array([[average_loss]]),
                             np.array([lossn]),
                             win="loss",
                             update="append")
                print("%.3f training loss: %.3f training F1 score: %.3f" %
                      (i / len_train_iter, average_loss, f1))

                dur_time = time.time() - start_time
                print("%s / %s batches trained, %d batches /s" %
                      (i, len_train_iter, args.disp_freq / dur_time))
                start_time = time.time()

                running_loss = 0

                if i % (args.disp_freq * 10) == 0:
                    quora_model.eval()
                    with torch.no_grad():
                        f1s = []
                        for i, (data, label, feature) in enumerate(valid_iter):
                            data, label, feature = data.to(device), label.to(
                                device), feature.to(device)
                            data = embeddings(data)

                            outputs = quora_model(data, feature)
                            _, _, f1 = F1score(outputs, label)
                            f1s.append(f1)
                        val_f1 = sum(f1s) / len(f1s)
                        val_f1s.append(val_f1)
                        if pargs.visdom:
                            vis.line(np.array([[val_f1]]),
                                     np.array([lossn]),
                                     win="f1",
                                     update="append")
                        rec, prec = recall / args.disp_freq / 10, precision / args.disp_freq / 10
                        training_f1 = (2 * rec * prec + E) / (rec + prec + E)
                        training_f1s.append(training_f1)
                        precision, recall = 0, 0
                        print(
                            "EPOCH %s, %s training F1 score: %.3f, validation F1 score: %.3f\nprevious best score: %.3f"
                            % (epoch, lossn, training_f1, val_f1, best_score))
                        start_time = time.time()

                    if val_f1 < best_score:
                        scheduler.step()
                    else:
                        best_score = val_f1

                        torch.save(
                            {
                                "model_args": args_dict,
                                "model_state_dict": quora_model.state_dict()
                            }, "model/" + args.name + "_" + quora_model.name +
                            "_" + str(lossn) + ".pt")
                    quora_model.train()
                    print("current lr: %s" % scheduler.get_lr()[0])
        with open("model/" + args.name + "_" + "train.txt", "w") as f:
            f.write("training_loss,")
            f.write(",".join([str(item) for item in losses]))
            f.write("\n")
            f.write("training_f1,")
            f.write(",".join([str(item) for item in training_f1s]))
            f.write("\n")
            f.write("eval_f1,")
            f.write(",".join([str(item) for item in val_f1s]))