Beispiel #1
0
def main():
    print(args)

    N_EPOCHS = args.n_epochs
    N_TRAIN = args.n_train
    N_VALID = args.n_valid
    BATCH_SIZE = args.batch_size
    EMB_DIM = args.emb_dim
    HID_DIM = args.hid_dim
    MAXOUT_DIM = args.maxout_dim

    TRAIN_X = 'PART_I.article'
    TRAIN_Y = 'PART_I.summary'
    VALID_X = 'PART_III.article'
    VALID_Y = 'PART_III.summary'

    trainX = utils.getDataLoader(TRAIN_X,
                                 max_len=100,
                                 n_data=N_TRAIN,
                                 batch_size=BATCH_SIZE)
    trainY = utils.getDataLoader(TRAIN_Y,
                                 max_len=25,
                                 n_data=N_TRAIN,
                                 batch_size=BATCH_SIZE)
    validX = utils.getDataLoader(VALID_X,
                                 max_len=100,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)
    validY = utils.getDataLoader(VALID_Y,
                                 max_len=25,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)

    vocab = json.load(open('data/vocab.json'))
    model = Seq2SeqAttention(len(vocab),
                             EMB_DIM,
                             HID_DIM,
                             BATCH_SIZE,
                             vocab,
                             device,
                             max_trg_len=25).cuda(device)

    model_file = args.model_file
    if os.path.exists(model_file):
        file = os.path.join(model_dir, os.listdir(model_dir)[-1])
        model.load_state_dict(torch.load(model_file))
        print('Load model parameters from %s' % model_file)

    optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20000,
                                                gamma=0.3)

    train(trainX, trainY, validX, validY, model, optimizer, scheduler,
          N_EPOCHS)
Beispiel #2
0
def test(model, dataset, batch_size):
    model.eval()

    gallery_dataloader = utils.getDataLoader(dataset,
                                             batch_size,
                                             'gallery',
                                             shuffle=False,
                                             augment=False)
    query_dataloader = utils.getDataLoader(dataset,
                                           batch_size,
                                           'query',
                                           shuffle=False,
                                           augment=False)

    gallery_cams, gallery_labels = get_cam_label(
        gallery_dataloader.dataset.imgs)
    query_cams, query_labels = get_cam_label(query_dataloader.dataset.imgs)

    # Extract feature
    gallery_features = []
    query_features = []

    for inputs, _ in gallery_dataloader:
        gallery_features.append(
            extract_feature(model, inputs, requires_norm=True,
                            vectorize=True).cpu().data)
    gallery_features = torch.cat(gallery_features, dim=0).numpy()

    for inputs, _ in query_dataloader:
        query_features.append(
            extract_feature(model, inputs, requires_norm=True,
                            vectorize=True).cpu().data)
    query_features = torch.cat(query_features, dim=0).numpy()

    CMC, mAP, (sorted_index_list, sorted_y_true_list,
               junk_index_list) = evaluate(query_features, query_labels,
                                           query_cams, gallery_features,
                                           gallery_labels, gallery_cams)

    rank_list_fig = utils.save_rank_list_img(query_dataloader,
                                             gallery_dataloader,
                                             sorted_index_list,
                                             sorted_y_true_list,
                                             junk_index_list)

    return CMC, mAP, rank_list_fig
Beispiel #3
0
def main():

	N_VALID = args.n_valid
	BATCH_SIZE = args.batch_size
	EMB_DIM = args.emb_dim
	HID_DIM = args.hid_dim

	VALID_X = 'PART_III.article'
	VALID_Y = 'PART_III.summary'

	validX = utils.getDataLoader(VALID_X, max_len=100, n_data=N_VALID, batch_size=BATCH_SIZE)
	validY = utils.getDataLoader(VALID_Y, max_len=25, n_data=N_VALID, batch_size=BATCH_SIZE)

	vocab = json.load(open('data/vocab.json'))
	model = Seq2SeqAttention(len(vocab), EMB_DIM, HID_DIM, BATCH_SIZE, vocab, device, max_trg_len=25).cuda(device)

	file = args.model_file
	if os.path.exists(file):
		model.load_state_dict(torch.load(file))
		print('Load model parameters from %s' % file)

	mytest(validX, validY, model)
Beispiel #4
0
def test(model, dataset, batch_size):
    model.eval()

    gallery_dataloader = utils.getDataLoader(dataset,
                                             batch_size,
                                             'gallery',
                                             shuffle=False,
                                             augment=False)
    query_dataloader = utils.getDataLoader(dataset,
                                           batch_size,
                                           'query',
                                           shuffle=False,
                                           augment=False)

    gallery_cams, gallery_labels = get_cam_label(
        gallery_dataloader.dataset.imgs)
    query_cams, query_labels = get_cam_label(query_dataloader.dataset.imgs)

    # Extract feature
    gallery_features = []
    query_features = []

    for inputs, _ in gallery_dataloader:
        gallery_features.append(
            extract_feature(model, inputs, requires_norm=True,
                            vectorize=True).cpu().data)
    gallery_features = torch.cat(gallery_features, dim=0).numpy()

    for inputs, _ in query_dataloader:
        query_features.append(
            extract_feature(model, inputs, requires_norm=True,
                            vectorize=True).cpu().data)
    query_features = torch.cat(query_features, dim=0).numpy()

    return evaluate(query_features, query_labels, query_cams, gallery_features,
                    gallery_labels,
                    gallery_cams), gallery_features, query_features
Beispiel #5
0
    def __init__(self, args):

        self.params = args

        (self.trainLoader, self.testLoader, self.D_in, self.D_out) = getDataLoader(
            trainRatio=self.params.trainRatio,
            batchSize=self.params.batchSize,
            accidentType=self.params.accidentType,
        )
        self.model = Network(self.D_in, self.D_out).to(DEVICE)

        self.opt = optim.Adam(
            self.model.parameters(), lr=self.params.learningRate, weight_decay=1e-5
        )
        self._criterion = torch.nn.CrossEntropyLoss()
Beispiel #6
0
def main():

    N_VALID = args.n_valid
    BATCH_SIZE = args.batch_size
    EMB_DIM = args.emb_dim
    HID_DIM = args.hid_dim

    VALID_X = 'sumdata/Giga/input.txt'
    VALID_Y = 'sumdata/Giga/task1_ref0.txt'

    vocab = json.load(open('sumdata/vocab.json'))
    validX = utils.getDataLoader(VALID_X,
                                 vocab,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)
    validY = utils.getDataLoader(VALID_Y,
                                 vocab,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)

    model = Seq2SeqAttention(len(vocab),
                             EMB_DIM,
                             HID_DIM,
                             BATCH_SIZE,
                             vocab,
                             device,
                             max_trg_len=25)
    if args.gpu != -1:
        model = model.cuda(device)

    file = args.model_file
    if os.path.exists(file):
        model.load_state_dict(torch.load(file))
        print('Load model parameters from %s' % file)

    mytest(validX, validY, model)
Beispiel #7
0
 def __init__(self,
              batch_size,
              image_size,
              latent_dim,
              epochs,
              discriminator_filters,
              generator_filters,
              device,
              mixed_probability=0.9,
              pl_beta=0.9):
     self.StyleGan = StyleGan(batch_size, image_size, latent_dim,
                              discriminator_filters, generator_filters,
                              device).to(device)
     self.image_size = image_size
     self.num_layers = np.log2(image_size)
     self.latent_dim = latent_dim
     self.batch_size = batch_size
     assert image_size in [2**x for x in range(5, 11)]
     self.discriminator_loss = torch.tensor(0.).to(device)
     self.generator_loss = torch.tensor(0.).to(device)
     self.dataLoader = utils.getDataLoader(batch_size, image_size)
     self.mixed_probability = mixed_probability
     self.epochs = epochs
     self.loss_fn = nn.BCEWithLogitsLoss()
     self.average_pl_length = None
     self.pl_beta = pl_beta
     self.device = device
     self.tensorboard_summary = SummaryWriter('runs7/stylegan2')
     self.checkpoint = 0
     self.apex_available = apex_available
     self.constant_style = utils.createStyleMixedNoiseList(
         self.batch_size, self.latent_dim, self.num_layers,
         self.StyleGan.styleNetwork, self.device)
     self.constant_noise = utils.create_image_noise(self.batch_size,
                                                    self.image_size,
                                                    self.device)
Beispiel #8
0
                        choices=['market1501', 'cuhk03', 'duke'])
    parser.add_argument('--batch_size',
                        default=512,
                        type=int,
                        help='batchsize')
    parser.add_argument('--share_conv', action='store_true')
    arg = parser.parse_args()

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    save_dir_path = os.path.join(arg.save_path, arg.dataset)
    logger = utils.Logger(save_dir_path)
    logger.info(vars(arg))

    # Just to get the correct num_classes of classifier fc
    train_dataloader = utils.getDataLoader(arg.dataset, arg.batch_size,
                                           'train')

    model = utils.load_network(
        PCBModel(num_classes=len(train_dataloader.dataset.classes),
                 share_conv=arg.share_conv,
                 return_features=True), save_dir_path, arg.which_epoch)
    model = model.to(device)
    CMC, mAP, rank_list_fig = test(model, arg.dataset, arg.batch_size)

    logger.info('Testing: top1:%.2f top5:%.2f top10:%.2f mAP:%.2f' %
                (CMC[0], CMC[4], CMC[9], mAP))
    logger.save_img(rank_list_fig)

    torch.cuda.empty_cache()
Beispiel #9
0
image_size = args.img_size
batch_size = args.batch_size
epochs = args.epochs
quiet = args.quiet == True
verbose = not quiet
latent_dim = 256
mixed_probability = 0.9
discriminator_filters = 8
generator_filters = 8
pl_beta = 0.99

if __name__ == '__main__':
    device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
    print("Using device: ", device)
    #model = StyleGan2Model()
    dataLoader = utils.getDataLoader(batch_size, image_size)
    # print(len(dataLoader) / 10)
    Trainer = models.Trainer(batch_size, image_size, latent_dim, epochs, discriminator_filters, generator_filters, device, mixed_probability, pl_beta)
    # print(Trainer.StyleGan)
    # print(Trainer.StyleGan.generator.state_dict())
    # print(sum(p.numel() for p in Trainer.StyleGan.parameters()))
    # print(Trainer.StyleGan.discriminator.state_dict())

    print(Trainer.StyleGan.generator.state_dict()['generatorBlocks.2.style_to_input_channels.weight'][0][0].item())
    # print(Trainer.StyleGan.discriminator.state_dict()[])
    print("Apex available: ", Trainer.apex_available)
    # Trainer.resetSaves()
    # x, y = next(enumerate(dataLoader))
    x, y = next(enumerate(dataLoader))
    # print(y[0])
    # utils.showImage(y[0][0].expand(3, -1, -1))
Beispiel #10
0
def main(args):
    # Setting
    warnings.simplefilter("ignore", UserWarning)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Args Parser
    hj_method = args.hj_method
    kr_method = args.kr_method
    batch_size = args.batch_size
    beam_size = args.beam_size
    hidden_size = args.hidden_size
    embed_size = args.embed_size
    vocab_size = args.vocab_size
    max_len = args.max_len
    padding_index = args.pad_id
    n_layers = args.n_layers
    stop_ix = args.stop_ix

    # Load saved model & Word2vec
    save_path = 'save_{}_{}_{}_maxlen_{}'.format(vocab_size, hj_method,
                                                 kr_method, max_len)
    save_list = sorted(glob.glob(f'./save/{save_path}/*.*'))
    save_pt = save_list[-1]
    print('Will load {} pt file...'.format(save_pt))
    word2vec_hj = Word2Vec.load('./w2v/word2vec_hj_{}_{}.model'.format(
        vocab_size, hj_method))

    # SentencePiece model load
    spm_kr = spm.SentencePieceProcessor()
    spm_kr.Load("./spm/m_korean_{}.model".format(vocab_size))

    # Test data load
    with open('./test_dat.pkl', 'rb') as f:
        test_dat = pickle.load(f)

    test_dataset = CustomDataset(test_dat['test_hanja'],
                                 test_dat['test_korean'])
    test_loader = getDataLoader(test_dataset,
                                pad_index=padding_index,
                                shuffle=False,
                                batch_size=batch_size)

    # Model load
    print('Model loading...')
    encoder = Encoder(vocab_size,
                      embed_size,
                      hidden_size,
                      word2vec_hj,
                      n_layers=n_layers,
                      padding_index=padding_index)
    decoder = Decoder(embed_size,
                      hidden_size,
                      vocab_size,
                      n_layers=n_layers,
                      padding_index=padding_index)
    seq2seq = Seq2Seq(encoder, decoder, beam_size).cuda()
    #optimizer = optim.Adam(seq2seq.parameters(), lr=lr, weight_decay=w_decay)
    #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=scheduler_step_size, gamma=lr_decay)
    print(seq2seq)

    print('Testing...')
    start_time = time.time()
    results = test(seq2seq,
                   test_loader,
                   vocab_size,
                   load_pt=save_pt,
                   stop_ix=stop_ix)
    print(time.time() - start_time)
    print('Done!')

    print("Decoding...")
    pred_list = list()
    for result_text in tqdm(results):
        text = torch.Tensor(result_text).squeeze().tolist()
        text = [int(x) for x in text]
        prediction_sentence = spm_kr.decode_ids(
            text).strip()  # Decode with strip
        pred_list.append(prediction_sentence)
    ref_list = list()
    for ref_text in tqdm(test_dat['test_korean'][:stop_ix]):
        ref_list.append(spm_kr.decode_ids(ref_text).strip())
    print('Done!')

    with open(f'./save/{save_path}/test_result.pkl', 'wb') as f:
        pickle.dump({
            'pred': pred_list,
            'reference': ref_list,
        }, f)
    print('Save file; /test_dat.pkl')

    # Calculate BLEU Score
    print('Calculate BLEU4, METEOR, Rogue-L...')
    chencherry = SmoothingFunction()
    bleu4 = corpus_bleu(test_dat['reference'],
                        test_dat['pred'],
                        smoothing_function=chencherry.method4)
    print('BLEU Score is {}'.format(bleu4))

    # Calculate METEOR Score
    meteor = meteor_score(test_dat['reference'], test_dat['pred'])
    print('METEOR Score is {}'.format(meteor))

    # Calculate Rouge-L Score
    r = Rouge()
    total_test_length = len(test_dat['reference'])
    precision_all = 0
    recall_all = 0
    f_score_all = 0
    for i in range(total_test_length):
        [precision, recall, f_score] = r.rouge_l([test_dat['reference'][i]],
                                                 [test_dat['pred'][i]])
        precision_all += precision
        recall_all += recall
        f_score_all += f_score
    print('Precision : {}'.foramt(round(precision_all / total_test_length, 4)))
    print('Recall : {}'.foramt(round(recall_all / total_test_length, 4)))
    print('F Score : {}'.foramt(round(f_score_all / total_test_length, 4)))
Beispiel #11
0
        time_elapsed = time.time() - start_time
        logger.info('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))

        # Save final model weights
        utils.save_network(model, save_dir_path, 'final')

    # For debugging
    # inputs, classes = next(iter(dataloaders['train']))

    # ---------------------- Training settings ----------------------
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_dataloader = utils.getDataLoader(arg.dataset,
                                           arg.batch_size,
                                           'train',
                                           shuffle=True,
                                           augment=True)
    model = PCBModel(num_classes=len(train_dataloader.dataset.classes),
                     num_stripes=arg.stripes,
                     share_conv=arg.share_conv,
                     return_features=False)

    criterion = nn.CrossEntropyLoss()

    # Finetune the net
    optimizer = optim.SGD([{
        'params': model.backbone.parameters(),
        'lr': arg.learning_rate / 10
    }, {
        'params':
Beispiel #12
0
def main():
    print(args)

    N_EPOCHS = args.n_epochs
    N_TRAIN = args.n_train
    N_VALID = args.n_valid
    BATCH_SIZE = args.batch_size
    EMB_DIM = args.emb_dim
    HID_DIM = args.hid_dim
    MAXOUT_DIM = args.maxout_dim

    data_dir = 'sumdata/'
    TRAIN_X = 'sumdata/train/train.article.txt'
    TRAIN_Y = 'sumdata/train/train.title.txt'
    VALID_X = 'sumdata/train/valid.article.filter.txt'
    VALID_Y = 'sumdata/train/valid.title.filter.txt'

    vocab_file = os.path.join(data_dir, "vocab.json")
    if not os.path.exists(vocab_file):
        utils.build_vocab([TRAIN_X, TRAIN_Y], vocab_file)
    vocab = json.load(open(vocab_file))

    trainX = utils.getDataLoader(TRAIN_X,
                                 vocab,
                                 n_data=N_TRAIN,
                                 batch_size=BATCH_SIZE)
    trainY = utils.getDataLoader(TRAIN_Y,
                                 vocab,
                                 n_data=N_TRAIN,
                                 batch_size=BATCH_SIZE)
    validX = utils.getDataLoader(VALID_X,
                                 vocab,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)
    validY = utils.getDataLoader(VALID_Y,
                                 vocab,
                                 n_data=N_VALID,
                                 batch_size=BATCH_SIZE)

    model = Seq2SeqAttention(len(vocab),
                             EMB_DIM,
                             HID_DIM,
                             BATCH_SIZE,
                             vocab,
                             device,
                             max_trg_len=25,
                             dropout=0.5)
    if args.gpu != -1:
        model = model.cuda(device)

    model_file = args.model_file
    if os.path.exists(model_file):
        model.load_state_dict(torch.load(model_file))
        logging.info('Load model parameters from %s' % model_file)
        # print('Load model parameters from %s' % model_file)

    optimizer = torch.optim.Adam(model.parameters(), lr=0.0003)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=20000,
                                                gamma=0.3)

    train(trainX, trainY, validX, validY, model, optimizer, scheduler,
          N_EPOCHS)