def main(): opt = parse_opt() # Setup env = gym.make(opt['env']) agent = setup_agent(env, opt) burn_in_memory(env, agent, opt) # Load if specified if opt['load_path'] is not None: agent.load(opt['load_path']) # Main Training Loop for step in tqdm(range(1, opt['max_steps']+1, 1)): if step == 1: state = env.reset() done = False if opt['render']: env.render() agent.update_epsilon(step) action = agent.step(state) next_state, reward, done, _ = env.step(action) # Add to replay memory and update network agent.replaymemory.add(state, action, reward, next_state, done) agent.update_network() if done: state = env.reset() done = False else: state = next_state if step % opt['freeze_interval'] == 0: agent.copy_qnetwork() if step % opt['eval_interval'] == 0: eval_rewards = eval(env, agent, opt['eval_episodes']) print("step", step, "average reward", "{}(+/-{})".format(np.mean(eval_rewards), np.std(eval_rewards))) # Reset to continue training env.reset() if step % opt['save_interval'] == 0: agent.save(opt['save_path'], global_step=step)
tb_dir = osp.join(opt['dataset_splitBy'], 'tb_{}'.format(opt['output_postfix'])) print('TensorFlow summaries will be saved to `{:s}`'.format(tb_dir)) # also add the validation set, but with no flipping images orgflip = cfg.TRAIN.USE_FLIPPED cfg.TRAIN.USE_FLIPPED = False cfg.TRAIN.USE_FLIPPED = orgflip if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) #train_net(net, imdb, roidb, valroidb, output_dir, tb_dir, train_net( net, loader, output_dir, tb_dir, pretrained_model= 'pyutils/mask-faster-rcnn/output/vgg16/coco_2014_train+coco_2014_valminusminival/vgg16_faster_rcnn_iter_1190000.pth', #pretrained_model='pyutils/mask-faster-rcnn/output/res101/coco_2014_train_minus_refer_valtest+coco_2014_valminusminival/notime/res101_mask_rcnn_iter_1250000.pth', max_iters=args.max_iters) if __name__ == '__main__': args = parse_opt() main(args)
for i in test_videos: j+=1 #print(i) if i['video_id'] not in video_caption.keys(): video_caption[i['video_id']] = {'captions': []} video_caption[i['video_id']]['captions'].append(i['caption']) vocab = build_vocab(video_caption) itow = {i + 2: w for i, w in enumerate(vocab)} wtoi = {w: i + 2 for i, w in enumerate(vocab)} # inverse table wtoi['<eos>'] = 0 itow[0] = '<eos>' wtoi['<sos>'] = 1 itow[1] = '<sos>' train(opt,EncoderRNN,DecoderCNN,Convcap,itow) if __name__ == '__main__': opt = opt.parse_opt() opt = vars(opt) #print(opt) os.environ['CUDA_VISIBLE_DEVICES'] = opt["gpu"] opt_json = os.path.join(opt["checkpoint_path"], 'opt_info1.json') if not os.path.isdir(opt["checkpoint_path"]): os.mkdir(opt["checkpoint_path"]) with open('opt.json', 'w') as f: json.dump(opt, f) #print('save opt details to %s' % (opt_json)) main(opt)
from tqdm import tqdm import time from keras.layers.normalization import BatchNormalization ''' parameters ''' characters = string.digits + string.ascii_uppercase + '*' #print(characters) n_class = len(characters) width, height = 247, 107 n_len = 7 #length of Taiwan License Number opts = opt.parse_opt() #parameters print("Using model name:"), print(opts.modelname) ''' our loss function ''' def ctc_lambda_func(args): y_pred, labels, input_length, label_length = args y_pred = y_pred[:, 2:, :] return K.ctc_batch_cost(labels, y_pred, input_length, label_length) ''' image generator
import time import numpy as np import torch import torch.optim as optim import torch.utils.data.distributed from torch.nn.utils import clip_grad_norm_ from torch.optim import lr_scheduler from torch.utils.data import DataLoader from data.anet_dataset import ANetDataset, get_vocab_and_sentences from model.bmn import BMN from model.loss_func import bmn_loss_func, get_mask from opt import parse_opt args = parse_opt(train=True) print(args) torch.manual_seed(args.seed) np.random.seed(args.seed) if args.cuda: torch.cuda.manual_seed_all(args.seed) def get_dataset(args): # process text text_proc, raw_data = get_vocab_and_sentences(args.dataset_file, args.max_sentence_len) # Create the dataset and data loader instance
import os.path as osp from datasets.roidb import Roidb from datasets.refer import Refer from opt import parse_opt import json import io opt = parse_opt() opt = vars(opt) class Refvg(object): def __init__(self, split, model_method): self._dataset = 'refvg' self._imageset = 'vg' self._split = split self._ref_db = Refer(opt['data_root'], self._dataset, split) if model_method == 'sgmn': self._ref_sg = self._load_sg() self._ref_sg_seq = self._load_sg_seq() else: self._ref_sg = None self._ref_sg_seq = None self._sent_ids = self._ref_db.get_sentIds() self._image_ids = self._ref_db.get_imgIds(self._sent_ids) roidb = Roidb(self._imageset, model_method) self._rois_db = {} self.max_num_box = 0 self.min_num_box = 9999 for img_id in self._image_ids: assert roidb.roidb.has_key(img_id)
loss_neg = -coef_0 * torch.log(1.0 - pred_score + epsilon) * (1.0 - pmask) loss = torch.mean(loss_pos + loss_neg) return loss npos, nneg = num_pos_neg # pos and neg number of start and end should be similar assert npos > 0 and nneg > 0 loss_start = bi_loss(pred_start, gt_start, npos, nneg) loss_end = bi_loss(pred_end, gt_end, npos, nneg) loss = loss_start + loss_end return loss if __name__ == '__main__': import opt args = opt.parse_opt() tscale, maxdur = args.temporal_scale, args.max_duration bm_mask = get_mask(tscale, maxdur) pred_bm = torch.ones([1, maxdur, tscale]) / 2 pred_start, pred_end = torch.ones([1, tscale]) / 2, torch.ones([1, tscale ]) / 2 gt_iou_map, gt_start, gt_end = map(lambda x: torch.ones_like(x) / 4, [pred_bm, pred_start, pred_end]) pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, bm_mask = map( lambda x: x.cuda(), [pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, bm_mask]) total_loss, tem_loss, pem_reg_loss, pem_cls_loss = bmn_loss_func( pred_bm, pred_start, pred_end, gt_iou_map, gt_start, gt_end, bm_mask) print(total_loss, tem_loss, pem_cls_loss, pem_reg_loss)
import json import os import time import multiprocessing as mp import numpy as np import torch from torch.utils.data import DataLoader from tools.eval_proposal_anet import ANETproposal from data.anet_dataset import ANetDataset, get_vocab_and_sentences from data.utils import iou_with_anchors from model.bmn import BMN from opt import parse_opt args = parse_opt(train=False) args.batch_size = 1 print(args) def get_dataset(args): # process text text_proc, raw_data = get_vocab_and_sentences(args.dataset_file, args.max_sentence_len) # Create the dataset and data loader instance test_dataset = ANetDataset(args, args.test_data_folder, text_proc, raw_data, test=True) test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=args.num_workers) return test_loader, text_proc