from miscc.config import cfg, cfg_from_file import pprint import datetime import dateutil.tz from torch.utils.tensorboard import SummaryWriter from utils.log import create_logger from utils.data_utils import CUBDataset from utils.trainer import condGANTrainer as trainer # Set a config file as 'train_birds.yml' in training, as 'eval_birds.yml' for evaluation parser = argparse.ArgumentParser(description='xxx') parser.add_argument('--config', type=str, default='cfg/train_birds.yml') args = parser.parse_args() cfg_from_file(args.config) os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU_ID # Set directories and logger now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = 'sample/%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) log_dir = os.path.join( cfg.LOG_DIR, '%s_%s_%s' % (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp)) os.makedirs(log_dir, exist_ok=True) log_filename = os.path.join(log_dir, 'train.log') log, logclose = create_logger(log_filename=log_filename) writer = SummaryWriter(log_dir=log_dir) with open(log_filename, 'w+') as logFile:
parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='cfg/birds_proGAN.yml', type=str) parser.add_argument('--gpu', dest='gpu_id', type=str, default='-1') parser.add_argument('--data_dir', dest='data_dir', type=str, default='') parser.add_argument('--manualSeed', type=int, help='manual seed') args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.gpu_id != '-1': cfg.GPU_ID = args.gpu_id else: cfg.CUDA = False if args.data_dir != '': cfg.DATA_DIR = args.data_dir #print('Using config:') # pprint.pprint(cfg) if not cfg.TRAIN.FLAG: args.manualSeed = 100 elif args.manualSeed is None: args.manualSeed = random.randint(1, 10000)
def testproc2(): args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.gpu_id == -1: cfg.CUDA = False else: cfg.GPU_ID = args.gpu_id if args.data_dir != '': cfg.DATA_DIR = args.data_dir print('Using config:') pprint.pprint(cfg) if not cfg.TRAIN.FLAG: args.manualSeed = 100 elif args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if cfg.CUDA: torch.cuda.manual_seed_all(args.manualSeed) ########################################################################## now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '../output/%s_%s_%s' % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) model_dir = os.path.join(output_dir, 'Model') image_dir = os.path.join(output_dir, 'Image') mkdir_p(model_dir) mkdir_p(image_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True # Get data loader ################################################## imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) batch_size = cfg.TRAIN.BATCH_SIZE image_transform = transforms.Compose([ transforms.Resize(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip() ]) dataset = FashionTextDataset(cfg.DATA_DIR, 'train', base_size=cfg.TREE.BASE_SIZE, transform=image_transform) print(dataset.n_words, dataset.embeddings_num) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=int(cfg.WORKERS)) # Train ############################################################## rnn_model, cnn_model, labels, start_epoch = build_models() para = list(rnn_model.parameters()) for v in cnn_model.parameters(): if v.requires_grad: para.append(v) data = next(iter(dataloader)) imgs, captions, cap_lens, class_ids, keys = prepare_data(data) # check last item from images list. print(imgs[-1].shape) print("labels", labels) #print("class ids",class_ids) words_features, sent_code = cnn_model(imgs[-1]) print("words features shape", words_features.shape) print("sent code shape", sent_code.shape) # --> batch_size x nef x 17*17 nef, att_sze = words_features.size(1), words_features.size(2) print("nef att_sze", nef, att_sze) # words_features = words_features.view(batch_size, nef, -1) hidden = rnn_model.init_hidden(batch_size) for i, h in enumerate(hidden): print("hidden size", i + 1, h.size()) # 2 x batch_size x hidden_size # words_emb: batch_size x nef x seq_len # sent_emb: batch_size x nef print("train captions", captions.size()) print("train cap_lens", cap_lens.size()) #print("train word features", words_features.size() ) #print("train sent_code", sent_code.size() ) words_emb, sent_emb = rnn_model(captions, cap_lens, hidden) print("words_emb shape", words_emb.size()) print("sent_emb shape", sent_emb.size()) i = 10 masks = [] if class_ids is not None: mask = (class_ids == class_ids[i]).astype(np.uint8) mask[i] = 0 masks.append(mask.reshape((1, -1))) print("no masks, if class ids are sequential.", masks) #data_dir = "/home/donchan/Documents/DATA/CULTECH_BIRDS/CUB_200_2011/train" #if os.path.isfile(data_dir + '/class_info.pickle'): # with open(data_dir + '/class_info.pickle', 'rb') as f: # class_id = pickle.load(f, encoding="latin1") # Get the i-th text description words_num = cap_lens[i] print(words_num) # -> 1 x nef x words_num word = words_emb[i, :, :words_num].unsqueeze(0).contiguous() print(word.size()) # -> batch_size x nef x words_num word = word.repeat(batch_size, 1, 1) print(word.size()) #print(word) context = words_features.clone() query = word.clone() batch_size, queryL = query.size(0), query.size(2) ih, iw = context.size(2), context.size(3) sourceL = ih * iw # --> batch x sourceL x ndf context = context.view(batch_size, -1, sourceL) contextT = torch.transpose(context, 1, 2).contiguous() # Get attention # (batch x sourceL x ndf)(batch x ndf x queryL) # -->batch x sourceL x queryL attn = torch.bmm(contextT, query) # Eq. (7) in AttnGAN paper # --> batch*sourceL x queryL attn = attn.view(batch_size * sourceL, queryL) #print("attn on Eq.8 on GlobalAttention", attn.size() , attn.data.cpu().sum() ) # 13872, 6 / 13872, 7 ?? attn = nn.Softmax(dim=0)(attn) # Eq. (8) print("attn size", attn.size()) # --> batch x sourceL x queryL attn = attn.view(batch_size, sourceL, queryL) # --> batch*queryL x sourceL attn = torch.transpose(attn, 1, 2).contiguous() attn = attn.view(batch_size * queryL, sourceL) print("attn size", attn.size()) #print("attn on Eq.9 on GlobalAttention", attn.size() , attn.data.cpu().sum() ) # 288, 289 / 336 , 289 ? # Eq. (9) attn = attn * cfg.TRAIN.SMOOTH.GAMMA1 attn = nn.Softmax(dim=0)(attn) attn = attn.view(batch_size, queryL, sourceL) # --> batch x sourceL x queryL attnT = torch.transpose(attn, 1, 2).contiguous() # (batch x ndf x sourceL)(batch x sourceL x queryL) # --> batch x ndf x queryL weightedContext = torch.bmm(context, attnT) print("weight size", weightedContext.size()) attn = attn.view(batch_size, -1, ih, iw) print("attn size after Eq9", attn.size()) att_maps = [] #weiContext, attn = func_attention(word, context, cfg.TRAIN.SMOOTH.GAMMA1) att_maps.append(attn[i].unsqueeze(0).contiguous()) # --> batch_size x words_num x nef word = word.transpose(1, 2).contiguous() weightedContext = weightedContext.transpose(1, 2).contiguous() # --> batch_size*words_num x nef word = word.view(batch_size * words_num, -1) weightedContext = weightedContext.view(batch_size * words_num, -1) print("weight size after Eq.10", weightedContext.size()) # # -->batch_size*words_num row_sim = cosine_similarity(word, weightedContext) print("row similarities", row_sim.size()) # --> batch_size x words_num row_sim = row_sim.view(batch_size, words_num) # Eq. (10) row_sim.mul_(cfg.TRAIN.SMOOTH.GAMMA2).exp_() row_sim = row_sim.sum(dim=1, keepdim=True) row_sim = torch.log(row_sim) print(row_sim)
type=int, default=100, help='number of images to be created') args = parser.parse_args() return args if __name__ == "__main__": # print(os.getcwd()) # exit(0) args = parse_args() cfg_path = "cfg/bird_cycle.yaml" cfg_from_file(cfg_path) cfg.GPU_ID = 0 cfg.DATA_DIR = 'data/birds' split_dir, bshuffle = 'train', True if not cfg.TRAIN.FLAG: split_dir = 'test' # Get data loader imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) image_transform = transforms.Compose([ transforms.Scale(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip() ]) dataset = TextDataset(cfg.DATA_DIR,
def testproc(): args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.gpu_id == -1: cfg.CUDA = False else: cfg.GPU_ID = args.gpu_id if args.data_dir != '': cfg.DATA_DIR = args.data_dir print('Using config:') pprint.pprint(cfg) if not cfg.TRAIN.FLAG: args.manualSeed = 100 elif args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) np.random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if cfg.CUDA: torch.cuda.manual_seed_all(args.manualSeed) ########################################################################## now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '../output/%s_%s_%s' % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) model_dir = os.path.join(output_dir, 'Model') image_dir = os.path.join(output_dir, 'Image') mkdir_p(model_dir) mkdir_p(image_dir) torch.cuda.set_device(cfg.GPU_ID) cudnn.benchmark = True # Get data loader ################################################## imsize = cfg.TREE.BASE_SIZE * (2**(cfg.TREE.BRANCH_NUM - 1)) batch_size = cfg.TRAIN.BATCH_SIZE image_transform = transforms.Compose([ transforms.Resize(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip() ]) dataset = FashionTextDataset(cfg.DATA_DIR, 'train', base_size=cfg.TREE.BASE_SIZE, transform=image_transform) #dataset = FashionTextDataset(cfg.DATA_DIR, 'train', # base_size=cfg.TREE.BASE_SIZE, # transform=image_transform) print(dataset.n_words, dataset.embeddings_num) assert dataset dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, drop_last=True, shuffle=True, num_workers=int(cfg.WORKERS)) imgs, caps, cap_len, cls_id, key = next(iter(dataloader)) print(imgs[0].shape) # Train ############################################################## text_encoder, image_encoder, labels, start_epoch = build_models( dataset, batch_size) para = list(text_encoder.parameters()) for v in image_encoder.parameters(): if v.requires_grad: para.append(v) # optimizer = optim.Adam(para, lr=cfg.TRAIN.ENCODER_LR, betas=(0.5, 0.999)) # At any point you can hit Ctrl + C to break out of training early. try: lr = cfg.TRAIN.ENCODER_LR for epoch in range(start_epoch, 1): optimizer = optim.Adam(para, lr=lr, betas=(0.5, 0.999)) epoch_start_time = time.time() count = trainSingle(dataloader, image_encoder, text_encoder, batch_size, labels, optimizer, epoch, dataset.ixtoword, image_dir) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early')
def loading_model(dataset_name='bird'): #IMPORTANT ARGUMENTS if (dataset_name=='bird') : cfg_file=os.path.join(current_dir,"cfg/eval_bird.yml") else : cfg_file=os.path.join(current_dir,"cfg/eval_coco.yml") gpu_id=-1 #change it to 0 or more when using gpu data_dir='' manualSeed = 100 #cfg file set if cfg_file is not None: cfg_from_file(cfg_file) if gpu_id != -1: cfg.GPU_ID = gpu_id else: cfg.CUDA = False if data_dir != '': cfg.DATA_DIR = data_dir now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '../output/%s_%s_%s' % \ (cfg.DATASET_NAME, cfg.CONFIG_NAME, timestamp) split_dir, bshuffle = 'train', True if not cfg.TRAIN.FLAG: # bshuffle = False split_dir = 'test' # Get data loader imsize = cfg.TREE.BASE_SIZE * (2 ** (cfg.TREE.BRANCH_NUM - 1)) image_transform = transforms.Compose([ transforms.Scale(int(imsize * 76 / 64)), transforms.RandomCrop(imsize), transforms.RandomHorizontalFlip()]) dataset = TextDataset(cfg.DATA_DIR, split_dir, base_size=cfg.TREE.BASE_SIZE, transform=image_transform) assert dataset dataloader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TRAIN.BATCH_SIZE, drop_last=True, shuffle=bshuffle, num_workers=int(cfg.WORKERS)) ###setting up ALGO # Define models and go to train/evaluate algo = trainer(output_dir, dataloader, dataset.n_words, dataset.ixtoword) #loading text ENCODER text_encoder = RNN_ENCODER(algo.n_words, nhidden=cfg.TEXT.EMBEDDING_DIM) state_dict = torch.load(cfg.TRAIN.NET_E, map_location=lambda storage, loc: storage) #TRAIN.NET_E path can be given directly text_encoder.load_state_dict(state_dict) # print('Load text encoder from:', cfg.TRAIN.NET_E) ###edited here if cfg.CUDA: text_encoder = text_encoder.cuda() text_encoder.eval() #LOADING Generator netG = G_NET() model_dir = cfg.TRAIN.NET_G #directory for model can be given directly as well state_dict = torch.load(model_dir, map_location=lambda storage, loc: storage) netG.load_state_dict(state_dict) # print('Load G from: ', model_dir) ###edited here if cfg.CUDA: netG.cuda() netG.eval() return [algo,text_encoder,netG,dataset]
def parse_args(): parser = argparse.ArgumentParser(description='Train a GAN network') parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='birds_stage1.yml', type=str) parser.add_argument('--gpu', dest='gpu_id', type=str, default='0') parser.add_argument('--data_dir', dest='data_dir', type=str, default='') parser.add_argument('--manualSeed', type=int, help='manual seed') args = parser.parse_args() return args if __name__ == "__main__": args = parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.gpu_id != -1: cfg.GPU_ID = args.gpu_id if args.data_dir != '': cfg.DATA_DIR = args.data_dir print('Using config:') pprint.pprint(cfg) if args.manualSeed is None: args.manualSeed = random.randint(1, 10000) random.seed(args.manualSeed) torch.manual_seed(args.manualSeed) if cfg.CUDA: torch.cuda.manual_seed_all(args.manualSeed) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '../output/%s_%s_%s' % \
help='To use new arch or not', dest='new_arch', default=0) parser.add_argument('--use_cap_model', type=int, help='To use (1) Captioning Model or not (0)', dest='cap_flag', default=0) args = parser.parse_args() return args if __name__ == "__main__": opt = parse_args() if opt.cfg_file is not None: cfg_from_file(opt.cfg_file) if opt.gpu_id != -1: cfg.GPU_ID = opt.gpu_id # sys.path.append(os.path.abspath(opt.cap_dir)) # sys.path.append(os.path.abspath(os.path.join(opt.cap_dir,'misc'))) # import opts as opts # import models as models # from dataloader import * # from dataloaderraw import * # import eval_utils as eval_utils # import misc.utils as utils # import resnet # from resnet_utils import myResnet # if torch.cuda.is_available():
from miscc.config import cfg, cfg_from_file from miscc.utils import build_super_images2 from model import RNN_ENCODER, G_NET import tensorflow as tf from ISR.models import RDN if sys.version_info[0] == 2: import cPickle as pickle else: import pickle from cachelib import SimpleCache cache = SimpleCache() cfg_from_file(r'E:\Projects\digital_writer\AttnGAN\code\cfg\eval_coco.yml') # otherwise it allocates all memory and no memory is left for pytorch gpu_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(gpu_devices[0], True) config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True session = tf.compat.v1.Session(config=config) rdn = RDN(weights='psnr-large') # rdn = RDN(weights='psnr-small') def vectorize_caption(wordtoix, caption, copies=2): # create caption vector tokens = caption.split(' ') cap_v = []