Esempio n. 1
0
from miscc.config import cfg, cfg_from_file
import pprint
import datetime
import dateutil.tz

from torch.utils.tensorboard import SummaryWriter
from utils.log import create_logger
from utils.data_utils import CUBDataset
from utils.trainer import condGANTrainer as trainer

# Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation
parser = argparse.ArgumentParser(description='xxx')
parser.add_argument('--config', type=str, default='cfg/train_birds.yml')
args = parser.parse_args()
cfg_from_file(args.config)

os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU_ID

# Set directories and logger
now = datetime.datetime.now(dateutil.tz.tzlocal())
timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)
log_dir = os.path.join(
    cfg.LOG_DIR, '%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp))
os.makedirs(log_dir, exist_ok=True)
log_filename = os.path.join(log_dir, 'train.log')
log, logclose = create_logger(log_filename=log_filename)
writer = SummaryWriter(log_dir=log_dir)

with open(log_filename, 'w+') as logFile:
    parser.add_argument('--cfg',
                        dest='cfg_file',
                        help='optional config file',
                        default='cfg/birds_proGAN.yml',
                        type=str)
    parser.add_argument('--gpu', dest='gpu_id', type=str, default='-1')
    parser.add_argument('--data_dir', dest='data_dir', type=str, default='')
    parser.add_argument('--manualSeed', type=int, help='manual seed')
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    args = parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    if args.gpu_id != '-1':
        cfg.GPU_ID = args.gpu_id
    else:
        cfg.CUDA = False

    if args.data_dir != '':
        cfg.DATA_DIR = args.data_dir
    #print('Using config:')
    # pprint.pprint(cfg)

    if not cfg.TRAIN.FLAG:
        args.manualSeed = 100
    elif args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
Esempio n. 3
0
def testproc2():
    args = parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    if args.gpu_id == -1:
        cfg.CUDA = False
    else:
        cfg.GPU_ID = args.gpu_id

    if args.data_dir != '':
        cfg.DATA_DIR = args.data_dir
    print('Using config:')
    pprint.pprint(cfg)

    if not cfg.TRAIN.FLAG:
        args.manualSeed = 100
    elif args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if cfg.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)

    ##########################################################################
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    output_dir = '../output/%s_%s_%s' % \
        (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

    model_dir = os.path.join(output_dir, 'Model')
    image_dir = os.path.join(output_dir, 'Image')
    mkdir_p(model_dir)
    mkdir_p(image_dir)

    torch.cuda.set_device(cfg.GPU_ID)
    cudnn.benchmark = True

    # Get data loader ##################################################
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))
    batch_size = cfg.TRAIN.BATCH_SIZE
    image_transform = transforms.Compose([
        transforms.Resize(int(imsize * 76 / 64)),
        transforms.RandomCrop(imsize),
        transforms.RandomHorizontalFlip()
    ])

    dataset = FashionTextDataset(cfg.DATA_DIR,
                                 'train',
                                 base_size=cfg.TREE.BASE_SIZE,
                                 transform=image_transform)

    print(dataset.n_words, dataset.embeddings_num)
    assert dataset
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             drop_last=True,
                                             shuffle=True,
                                             num_workers=int(cfg.WORKERS))

    # Train ##############################################################
    rnn_model, cnn_model, labels, start_epoch = build_models()
    para = list(rnn_model.parameters())
    for v in cnn_model.parameters():
        if v.requires_grad:
            para.append(v)

    data = next(iter(dataloader))
    imgs, captions, cap_lens, class_ids, keys = prepare_data(data)

    # check last item from images list.
    print(imgs[-1].shape)
    print("labels", labels)
    #print("class ids",class_ids)

    words_features, sent_code = cnn_model(imgs[-1])
    print("words features shape", words_features.shape)
    print("sent code shape", sent_code.shape)
    # --> batch_size x nef x 17*17
    nef, att_sze = words_features.size(1), words_features.size(2)
    print("nef att_sze", nef, att_sze)
    # words_features = words_features.view(batch_size, nef, -1)

    hidden = rnn_model.init_hidden(batch_size)
    for i, h in enumerate(hidden):
        print("hidden size", i + 1, h.size())

    # 2 x batch_size x hidden_size

    # words_emb: batch_size x nef x seq_len
    # sent_emb: batch_size x nef

    print("train captions", captions.size())
    print("train cap_lens", cap_lens.size())
    #print("train word features", words_features.size() )
    #print("train sent_code", sent_code.size() )

    words_emb, sent_emb = rnn_model(captions, cap_lens, hidden)
    print("words_emb shape", words_emb.size())
    print("sent_emb shape", sent_emb.size())

    i = 10
    masks = []
    if class_ids is not None:
        mask = (class_ids == class_ids[i]).astype(np.uint8)
        mask[i] = 0
        masks.append(mask.reshape((1, -1)))

    print("no masks, if class ids are sequential.", masks)

    #data_dir = "/home/donchan/Documents/DATA/CULTECH_BIRDS/CUB_200_2011/train"
    #if os.path.isfile(data_dir + '/class_info.pickle'):
    #    with open(data_dir + '/class_info.pickle', 'rb') as f:
    #        class_id = pickle.load(f, encoding="latin1")

    # Get the i-th text description
    words_num = cap_lens[i]
    print(words_num)
    # -> 1 x nef x words_num
    word = words_emb[i, :, :words_num].unsqueeze(0).contiguous()
    print(word.size())
    # -> batch_size x nef x words_num
    word = word.repeat(batch_size, 1, 1)
    print(word.size())
    #print(word)

    context = words_features.clone()
    query = word.clone()

    batch_size, queryL = query.size(0), query.size(2)
    ih, iw = context.size(2), context.size(3)
    sourceL = ih * iw

    # --> batch x sourceL x ndf
    context = context.view(batch_size, -1, sourceL)
    contextT = torch.transpose(context, 1, 2).contiguous()

    # Get attention
    # (batch x sourceL x ndf)(batch x ndf x queryL)
    # -->batch x sourceL x queryL
    attn = torch.bmm(contextT, query)  # Eq. (7) in AttnGAN paper
    # --> batch*sourceL x queryL
    attn = attn.view(batch_size * sourceL, queryL)

    #print("attn on Eq.8 on GlobalAttention", attn.size()  , attn.data.cpu().sum() ) # 13872, 6   / 13872, 7 ??
    attn = nn.Softmax(dim=0)(attn)  # Eq. (8)
    print("attn size", attn.size())

    # --> batch x sourceL x queryL
    attn = attn.view(batch_size, sourceL, queryL)
    # --> batch*queryL x sourceL
    attn = torch.transpose(attn, 1, 2).contiguous()
    attn = attn.view(batch_size * queryL, sourceL)
    print("attn size", attn.size())

    #print("attn on Eq.9 on GlobalAttention", attn.size() , attn.data.cpu().sum() ) # 288, 289 / 336 , 289 ?

    #  Eq. (9)

    attn = attn * cfg.TRAIN.SMOOTH.GAMMA1
    attn = nn.Softmax(dim=0)(attn)
    attn = attn.view(batch_size, queryL, sourceL)
    # --> batch x sourceL x queryL
    attnT = torch.transpose(attn, 1, 2).contiguous()

    # (batch x ndf x sourceL)(batch x sourceL x queryL)
    # --> batch x ndf x queryL
    weightedContext = torch.bmm(context, attnT)
    print("weight size", weightedContext.size())

    attn = attn.view(batch_size, -1, ih, iw)
    print("attn size after Eq9", attn.size())

    att_maps = []
    #weiContext, attn = func_attention(word, context, cfg.TRAIN.SMOOTH.GAMMA1)
    att_maps.append(attn[i].unsqueeze(0).contiguous())
    # --> batch_size x words_num x nef
    word = word.transpose(1, 2).contiguous()
    weightedContext = weightedContext.transpose(1, 2).contiguous()
    # --> batch_size*words_num x nef
    word = word.view(batch_size * words_num, -1)
    weightedContext = weightedContext.view(batch_size * words_num, -1)
    print("weight size after Eq.10", weightedContext.size())

    #
    # -->batch_size*words_num
    row_sim = cosine_similarity(word, weightedContext)
    print("row similarities", row_sim.size())
    # --> batch_size x words_num
    row_sim = row_sim.view(batch_size, words_num)

    # Eq. (10)
    row_sim.mul_(cfg.TRAIN.SMOOTH.GAMMA2).exp_()
    row_sim = row_sim.sum(dim=1, keepdim=True)
    row_sim = torch.log(row_sim)

    print(row_sim)
Esempio n. 4
0
                        type=int,
                        default=100,
                        help='number of images to be created')
    args = parser.parse_args()
    return args


if __name__ == "__main__":

    # print(os.getcwd())
    # exit(0)
    args = parse_args()

    cfg_path = "cfg/bird_cycle.yaml"

    cfg_from_file(cfg_path)
    cfg.GPU_ID = 0
    cfg.DATA_DIR = 'data/birds'

    split_dir, bshuffle = 'train', True
    if not cfg.TRAIN.FLAG:
        split_dir = 'test'

    # Get data loader
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))
    image_transform = transforms.Compose([
        transforms.Scale(int(imsize * 76 / 64)),
        transforms.RandomCrop(imsize),
        transforms.RandomHorizontalFlip()
    ])
    dataset = TextDataset(cfg.DATA_DIR,
Esempio n. 5
0
def testproc():

    args = parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    if args.gpu_id == -1:
        cfg.CUDA = False
    else:
        cfg.GPU_ID = args.gpu_id

    if args.data_dir != '':
        cfg.DATA_DIR = args.data_dir
    print('Using config:')
    pprint.pprint(cfg)

    if not cfg.TRAIN.FLAG:
        args.manualSeed = 100
    elif args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    random.seed(args.manualSeed)
    np.random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if cfg.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)

    ##########################################################################
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    output_dir = '../output/%s_%s_%s' % \
        (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

    model_dir = os.path.join(output_dir, 'Model')
    image_dir = os.path.join(output_dir, 'Image')
    mkdir_p(model_dir)
    mkdir_p(image_dir)

    torch.cuda.set_device(cfg.GPU_ID)
    cudnn.benchmark = True

    # Get data loader ##################################################
    imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1))
    batch_size = cfg.TRAIN.BATCH_SIZE
    image_transform = transforms.Compose([
        transforms.Resize(int(imsize * 76 / 64)),
        transforms.RandomCrop(imsize),
        transforms.RandomHorizontalFlip()
    ])

    dataset = FashionTextDataset(cfg.DATA_DIR,
                                 'train',
                                 base_size=cfg.TREE.BASE_SIZE,
                                 transform=image_transform)

    #dataset = FashionTextDataset(cfg.DATA_DIR, 'train',
    #                      base_size=cfg.TREE.BASE_SIZE,
    #                      transform=image_transform)

    print(dataset.n_words, dataset.embeddings_num)
    assert dataset
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=batch_size,
                                             drop_last=True,
                                             shuffle=True,
                                             num_workers=int(cfg.WORKERS))

    imgs, caps, cap_len, cls_id, key = next(iter(dataloader))
    print(imgs[0].shape)

    # Train ##############################################################
    text_encoder, image_encoder, labels, start_epoch = build_models(
        dataset, batch_size)
    para = list(text_encoder.parameters())
    for v in image_encoder.parameters():
        if v.requires_grad:
            para.append(v)
    # optimizer = optim.Adam(para, lr=cfg.TRAIN.ENCODER_LR, betas=(0.5, 0.999))
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        lr = cfg.TRAIN.ENCODER_LR
        for epoch in range(start_epoch, 1):
            optimizer = optim.Adam(para, lr=lr, betas=(0.5, 0.999))
            epoch_start_time = time.time()
            count = trainSingle(dataloader, image_encoder, text_encoder,
                                batch_size, labels, optimizer, epoch,
                                dataset.ixtoword, image_dir)
    except KeyboardInterrupt:
        print('-' * 89)
        print('Exiting from training early')
Esempio n. 6
0
def loading_model(dataset_name='bird'):
  #IMPORTANT ARGUMENTS
  if (dataset_name=='bird') :
    cfg_file=os.path.join(current_dir,"cfg/eval_bird.yml")
  else :
    cfg_file=os.path.join(current_dir,"cfg/eval_coco.yml")
  
  gpu_id=-1 #change it to 0 or more when using gpu
  data_dir=''
  manualSeed = 100

  #cfg file set
  if cfg_file is not None:
    cfg_from_file(cfg_file)

  if gpu_id != -1:
    cfg.GPU_ID = gpu_id
  else:
    cfg.CUDA = False

  if data_dir != '':
    cfg.DATA_DIR = data_dir


  now = datetime.datetime.now(dateutil.tz.tzlocal())
  timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
  output_dir = '../output/%s_%s_%s' % \
    (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)

  split_dir, bshuffle = 'train', True
  if not cfg.TRAIN.FLAG:
    # bshuffle = False
    split_dir = 'test'


  # Get data loader
  imsize = cfg.TREE.BASE_SIZE * (2 ** (cfg.TREE.BRANCH_NUM - 1))
  image_transform = transforms.Compose([
      transforms.Scale(int(imsize * 76 / 64)),
      transforms.RandomCrop(imsize),
      transforms.RandomHorizontalFlip()])
  dataset = TextDataset(cfg.DATA_DIR, split_dir,
                        base_size=cfg.TREE.BASE_SIZE,
                        transform=image_transform)
  assert dataset
  dataloader = torch.utils.data.DataLoader(
          dataset, batch_size=cfg.TRAIN.BATCH_SIZE,
          drop_last=True, shuffle=bshuffle, num_workers=int(cfg.WORKERS))


  ###setting up ALGO
  # Define models and go to train/evaluate
  algo = trainer(output_dir, dataloader, dataset.n_words, dataset.ixtoword)

  #loading text ENCODER
  text_encoder = RNN_ENCODER(algo.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM)
  state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) #TRAIN.NET_E path can be given directly
  text_encoder.load_state_dict(state_dict)
  # print('Load text encoder from:', cfg.TRAIN.NET_E) ###edited here
  if cfg.CUDA:
    text_encoder = text_encoder.cuda()
  text_encoder.eval()


  #LOADING Generator
  netG = G_NET()
  model_dir = cfg.TRAIN.NET_G #directory for model can be given directly as well
  state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage)
  netG.load_state_dict(state_dict)
  # print('Load G from: ', model_dir)  ###edited here
  if cfg.CUDA:
    netG.cuda()
  netG.eval()

  return [algo,text_encoder,netG,dataset]
Esempio n. 7
0
def parse_args():
    parser = argparse.ArgumentParser(description='Train a GAN network')
    parser.add_argument('--cfg', dest='cfg_file',
                        help='optional config file',
                        default='birds_stage1.yml', type=str)
    parser.add_argument('--gpu',  dest='gpu_id', type=str, default='0')
    parser.add_argument('--data_dir', dest='data_dir', type=str, default='')
    parser.add_argument('--manualSeed', type=int, help='manual seed')
    args = parser.parse_args()
    return args

if __name__ == "__main__":
    args = parse_args()
    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)
    if args.gpu_id != -1:
        cfg.GPU_ID = args.gpu_id
    if args.data_dir != '':
        cfg.DATA_DIR = args.data_dir
    print('Using config:')
    pprint.pprint(cfg)
    if args.manualSeed is None:
        args.manualSeed = random.randint(1, 10000)
    random.seed(args.manualSeed)
    torch.manual_seed(args.manualSeed)
    if cfg.CUDA:
        torch.cuda.manual_seed_all(args.manualSeed)
    now = datetime.datetime.now(dateutil.tz.tzlocal())
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S')
    output_dir = '../output/%s_%s_%s' % \
Esempio n. 8
0
                        help='To use new arch or not',
                        dest='new_arch',
                        default=0)
    parser.add_argument('--use_cap_model',
                        type=int,
                        help='To use (1) Captioning Model or not (0)',
                        dest='cap_flag',
                        default=0)
    args = parser.parse_args()
    return args


if __name__ == "__main__":
    opt = parse_args()
    if opt.cfg_file is not None:
        cfg_from_file(opt.cfg_file)
    if opt.gpu_id != -1:
        cfg.GPU_ID = opt.gpu_id

    # sys.path.append(os.path.abspath(opt.cap_dir))
    # sys.path.append(os.path.abspath(os.path.join(opt.cap_dir,'misc')))

    # import opts as opts
    # import models as models
    # from dataloader import *
    # from dataloaderraw import *
    # import eval_utils as eval_utils
    # import misc.utils as utils
    # import resnet
    # from resnet_utils import myResnet
    # if torch.cuda.is_available():
Esempio n. 9
0
from miscc.config import cfg, cfg_from_file
from miscc.utils import build_super_images2
from model import RNN_ENCODER, G_NET
import tensorflow as tf
from ISR.models import RDN

if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle

from cachelib import SimpleCache

cache = SimpleCache()

cfg_from_file(r'E:\Projects\digital_writer\AttnGAN\code\cfg\eval_coco.yml')

# otherwise it allocates all memory and no memory is left for pytorch
gpu_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpu_devices[0], True)
config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
session = tf.compat.v1.Session(config=config)
rdn = RDN(weights='psnr-large')
# rdn = RDN(weights='psnr-small')


def vectorize_caption(wordtoix, caption, copies=2):
    # create caption vector
    tokens = caption.split(' ')
    cap_v = []