def init_platform(): config_file = cfg_from_file('config.yml') default_file = cfg_from_file('default.yml') logger.info(pprint.pformat(default_file)) logger.info(pprint.pformat(config_file)) merge_a_into_b(config_file, config) merge_a_into_b(default_file, default) default.best_model_path = '' if default.gpu == '': default.gpu = None if default.gpu is not None: os.environ["CUDA_VISIBLE_DEVICES"] = default.gpu default.distributed = default.world_size > 1 if default.distributed: dist.init_process_group(backend=default.dist_backend, init_method=default.dist_url, world_size=default.world_size) default.lr_epoch = [int(ep) for ep in default.lr_step.split(',')] if default.seed is not None: seed = default.seed np.random.seed(seed) random.seed(seed) torch.manual_seed(seed) cudnn.deterministic = True
def main(_): parser = argparse.ArgumentParser( description='Classification model training') parser.add_argument('--config_file', type=str, default=None, help='Optional config file for params') parser.add_argument('opts', help='see config.py for all options', default=None, nargs=argparse.REMAINDER) args = parser.parse_args() if args.config_file is not None: cfg_from_file(args.config_file) if args.opts is not None: cfg_from_list(args.opts) assert_and_infer_cfg() print_cfg() os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg.GPU_ID) logger = utils.setup_custom_logger('root') tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) tf_config = tf.ConfigProto(device_count=dict( GPU=1), gpu_options=tf.GPUOptions(allow_growth=True)) tf.enable_resource_variables() train(tf_config, logger) test(tf_config, logger)
def demo_ori(): model_file = "../test_res18.prototxt" weights_file = "../weights_resize_res18.caffemodel" data_file = "../test_pad_30_resize_112.txt" img_root = "/home_1/data/caffe/DTY_Side" cfg_file = "../test_yxx_resize.yml" pkl_dir = "../pkl_feature" batch_size = 512 cfg_from_file(cfg_file) net = load_net_(model_file, weights_file=weights_file, GPU_index=3, batch_size=batch_size, forward_type="test") img_file_list = open(data_file, "r").readlines() img_list = img_file_list[0:batch_size] input_data, input_label, input_data_name = load_data_batch_with_label( net, img_root, img_list) feature_v = get_feature_information(net, model_file, input_data) # print feature_v["prob"] if not os.path.exists(pkl_dir): os.makedirs(pkl_dir) save_name = os.path.join(pkl_dir, "feature_res_{}.pkl".format(batch_size)) pickle.dump(feature_v, open(save_name, 'wb'))
def test_net_cls(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) net = load_net_(args.deploy_file, args.weights_file, args.gpu_id, batch_size=-1, forward_type="test") img_list = open(args.img_list_file, "r").readlines() input_layer_name = net._layer_names[net._inputs[0]] batch_size, c, h, w = net.blobs[input_layer_name].shape epoch_num = int(len(img_list) / batch_size) predict_res = [] for epoch_i in range(epoch_num): if epoch_i % 10 == 0: print "TODO: {}% ".format(1.0 * epoch_i / epoch_num * 100) batch_img_list = img_list[epoch_i * batch_size:(epoch_i + 1) * batch_size] input_data, input_label, input_data_name = load_data_batch_with_label( net, args.img_root_path, batch_img_list) single_batch_res = predict_with_label(net, input_data, input_label, input_data_name) predict_res.extend(single_batch_res) pkl_dir = os.path.join(args.save_root_path, "res_pkl") if not os.path.exists(pkl_dir): os.makedirs(pkl_dir) save_name = os.path.join( pkl_dir, "{}_res.pkl".format( os.path.splitext(os.path.basename(args.weights_file))[0])) pickle.dump(predict_res, open(save_name, 'wb'))
def main(): global args args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) files = os.listdir(args.model_dir) cp_files = [f for f in files if f.endswith('.pth.tar')] if args.file_ptn is not None: ptn = re.compile(args.file_ptn) cp_files = [f for f in files if ptn.search(f)] model_info = [] for cp_file in cp_files: file_name = cp_file.rsplit('.', 2)[0] model_group_name, model_name, _ = file_name.split('-') cp_path = os.path.join(args.model_dir, cp_file) model_info.append((model_group_name, model_name, cp_path)) att_query = AttQuery(model_info, args.split, args.model_dir) visualizer = Visualizer() while True: att_query.print_query() command = input('input command: ') if ' ' in command: opt, arg = command.split(' ', 1) else: opt, arg = command, None if opt in ('q', 'question'): att_query.set_question(arg) elif opt in ('a', 'answer'): att_query.set_answer(arg) elif opt in ('c', 'condition'): att_query.set_condition(arg) elif opt in ('x', 'clean'): att_query.set_question(None) att_query.set_answer(None) att_query.set_condition(None) elif opt in ('s', 'save'): if arg is None: inds = range(att_query.get_res_cnt()) else: inds = map(int, arg.split(',')) att_query.save(inds) elif opt in ('r', 'run'): if arg is None: arg = 1 result = att_query.run(int(arg)) visualizer.visualize(result, att_query.model_info) elif opt in ('e', 'exit'): break else: print('wrong command !')
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) print('[Info] called with: ' + args_str) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # checkpoint directory cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, args.checkpoint) # select device torch.cuda.set_device(args.gpu_id) print('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data print('[Info] init dataset') model_group_name, model_name = args.model.split('/') val_set = VQADataset('test', model_group_name) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) print('sample count: {}'.format(len(val_set))) # model print('[Info] construct model') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=val_set.num_words, num_ans=val_set.num_ans, emb_size=get_emb_size()) model.cuda() cudnn.benchmark = True print('[Info] model name: ' + args.model) # predict fnames = [(i, 'checkpoint-{:03}.pth.tar'.format(i)) for i in range(args.start_epoch, args.end_epoch, args.epoch_freq) ] cp_files = [(i, os.path.join(cfg.LOG_DIR, fname)) for i, fname in fnames] for epoch, cp_file in cp_files: if os.path.isfile(cp_file): print("[Info] loading checkpoint '{}'".format(cp_file)) checkpoint = torch.load(cp_file) model.load_state_dict(checkpoint['state_dict']) else: print("[Info] no checkpoint found at '{}'".format(cp_file)) continue results = predict(val_loader, model) result_file = os.path.join(cfg.LOG_DIR, 'result-{:03}.json'.format(epoch)) json.dump(results, open(result_file, 'w'))
def init_config(): parser = ArgumentParser(description='configs of DLA') parser.add_argument('--cfg', type=str, default='./cfg.yml') parser.add_argument("--random-train", action="store_true", help="not fixing random seed.") parser.add_argument("--visaulDL", action="store_true", help="visualize training loss with visualDL.") args = parser.parse_args() print('Called with args:') print(args) assert args.cfg is not None, 'Missing cfg file' cfg_from_file(args.cfg) return cfg
def main(): start_time = time.time() args = parse_args() if args.toy: paths = [ '/mnt/synology/breast/projects/lisa/toy_data/', '/input' ] else: paths = [ '/mnt/synology/breast/archives/screenpoint3/processed_dataset/', '/input' ] if args.no_rsync: rsyncing = False print('Working with symbolic links', flush=True) data_cmd = ['ln', '-s'] else: rsyncing = True print('Rsyncing data', flush=True) data_cmd = ['/usr/bin/rsync', '-am', '--stats'] subprocess.call(data_cmd + paths) print('Preparing dataset took {:.2f} seconds.'.format(time.time() - start_time), flush=True) # HERE qstar = True # Benchmark best convolution algorithm from torch.backends import cudnn cudnn.benchmark = True if args.cfg_file: print('Loading config {}.'.format(args.cfg_file)) cfg_from_file(args.cfg_file) else: print('No config given, using standard settings.') plot_fct(cfg, rsyncing, qstar=qstar, toy=args.toy)
def main(): args = parse_args() #print('Called with args:') #print(args) # Set main gpu theano.sandbox.cuda.use(args.gpu_id) if args.cfg_files is not None: for cfg_file in args.cfg_files: cfg_from_file(cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) if not args.randomize: np.random.seed(cfg.CONST.RNG_SEED) if args.batch_size is not None: cfg_from_list(['CONST.BATCH_SIZE', args.batch_size]) if args.iter is not None: cfg_from_list(['TRAIN.NUM_ITERATION', args.iter]) if args.net_name is not None: cfg_from_list(['NET_NAME', args.net_name]) if args.model_name is not None: cfg_from_list(['CONST.NETWORK_CLASS', args.model_name]) if args.dataset is not None: cfg_from_list(['DATASET', args.dataset]) if args.exp is not None: cfg_from_list(['TEST.EXP_NAME', args.exp]) if args.out_path is not None: cfg_from_list(['DIR.OUT_PATH', args.out_path]) if args.weights is not None: cfg_from_list(['CONST.WEIGHTS', args.weights, 'TRAIN.RESUME_TRAIN', True, 'TRAIN.INITIAL_ITERATION', int(args.init_iter)]) print('Using config:') pprint.pprint(cfg) if not args.test: train()
def main(): parser = argparse.ArgumentParser() # parser.add_argument('--num_epochs', type=int, default=30, help='training epochs') # parser.add_argument('--display_step', type=int, default=20, help='display step') # parser.add_argument('--checkpoint_step', type=int, default=5, help='checkpoint step') # parser.add_argument('--task', type=str, default="autoencoding", help='autoencoding|classification') # parser.add_argument('--batch_size', type=int, default=64, help='batch size') # parser.add_argument('--z_dim', type=int, default=10, help='latent variable dimensionality') # parser.add_argument('--learning_rate', type=float, default=0.0002, help='learning rate') # parser.add_argument('--dataset', type=str, default='svhn', help='mnist|svhn|cifar10') # parser.add_argument('--restore', type=int, default=0, help='restore') # parser.add_argument('--transfer', type=bool, default=True, help="") # parser.add_argument('--source_task', type=str, default="autoencoding") # parser.add_argument('--target_task', type=str, default="classification") # parser.add_argument('--mode', type=str, default="train", help="train|test|mi") # test or train or edge-detection # parser.add_argument('--remove_dims', type=str, default='') # parser.add_argument('--gpu', type=str, default='s0') parser.add_argument("--cfg", dest='cfg_file', default='./config/transfer-learning.yml', type=str, help="An optional config file" " to be loaded") args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) # set gpu os.environ['CUDA_VISIBLE_DEVICES'] = cfg.gpu if (cfg.mode == "train"): return train(cfg) elif (cfg.mode == "test"): return test(cfg) elif (cfg.mode == "mi"): return MI(cfg)
def test_main( iGpuId, iGpuIdCaller, strPrototxtPath, strCaffeModel ): args = parse_args() if iGpuId==iGpuIdCaller: print"[ERROR] [callee]callee GpuId = %d, caller GpuId = %d, they are the same!!!" %(iGpuId, iGpuIdCaller); sys.exit(1); args.gpu_id = iGpuId; args.prototxt = strPrototxtPath; args.caffemodel = strCaffeModel; print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) print('Using config:') pprint.pprint(cfg) print "[callee]callee is called!"
def restore(self): if not cfg_from_file(): return -1 if not osp.exists(cfg.SRC_DIR) or not osp.exists(cfg.DST_DIR): return -2 image_names = self.getImageNamesFromDisk(cfg.SRC_DIR) if len(image_names) == 0: return -3 self.image_names = image_names if cfg.IMAGE_NAME == '': self.cur_image_no = 0 self.cur_image_name = self.image_names[self.cur_image_no] else: try: self.cur_image_no = self.image_names.index(cfg.IMAGE_NAME) + 1 except ValueError: return -4 if self.cur_image_no >= self.getTotalImageNum(): return -5 self.cur_image_name = self.image_names[self.cur_image_no] return 0
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # use timestamp as log subdirectory timestamp = datetime.datetime.now().strftime('%Y%m%d%H%M%S') cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp) os.mkdir(cfg.LOG_DIR) json.dump(cfg, open(cfg.LOG_DIR + '/config.json', 'w'), indent=2) model_group_name, model_name = args.model.split('/') shutil.copy('models/' + model_group_name + '.py', cfg.LOG_DIR) # init ploter ploter = Ploter(timestamp) # setting log handlers fh = logging.FileHandler(os.path.join(cfg.LOG_DIR, 'log')) fh.setLevel(logging.DEBUG) fhc = logging.FileHandler('current.log') fhc.setLevel(logging.DEBUG) sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) fmt = '[%(asctime)-15s] %(message)s' datefmt = '%Y-%m-%d %H:%M:%S' formatter = logging.Formatter(fmt, datefmt) fh.setFormatter(formatter) fhc.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(fhc) logger.addHandler(sh) logger.debug('[Info] called with: ' + args_str) logger.debug('[Info] timestamp: ' + timestamp) logger.debug('[Info] CPU random seed: {}'.format(torch.initial_seed())) logger.debug('[Info] GPU random seed: {}'.format( torch.cuda.initial_seed())) # select device torch.cuda.set_device(args.gpu_id) logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data logger.debug('[Info] init dataset') do_test = (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('train2014', 'val2014')) trn_set = VQADataset('train', model_group_name) train_loader = torch.utils.data.DataLoader(trn_set, batch_size=args.bs, shuffle=True, num_workers=args.workers, pin_memory=True) if do_test: val_set = VQADataset('test', model_group_name) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) # model emb_size = 300 if cfg.WORD_EMBEDDINGS: word_vec = merge_embeddings(cfg.WORD_EMBEDDINGS) aword = next(iter(word_vec)) emb_size = len(word_vec[aword]) logger.debug('[Info] embedding size: {}'.format(emb_size)) logger.debug('[Info] construct model, criterion and optimizer') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=trn_set.num_words, num_ans=trn_set.num_ans, emb_size=emb_size) logger.debug('[Info] model name: ' + args.model) total_param = 0 for param in model.parameters(): total_param += param.nelement() logger.debug('[Info] total parameters: {}M'.format( math.ceil(total_param / 2**20))) # initialize word embedding with pretrained if cfg.WORD_EMBEDDINGS: emb = model.we.weight.data.numpy() words = trn_set.codebook['itow'] assert '<PAD>' not in word_vec fill_cnt = 0 for i, w in enumerate(words): if w in word_vec: emb[i] = word_vec[w] fill_cnt += 1 logger.debug('[debug] word embedding filling count: {}/{}'.format( fill_cnt, len(words))) model.we.weight = nn.Parameter(torch.from_numpy(emb)) if model_group_name in ('onehot_label', 'prob_label'): # initialize object embedding with pretrained obj_emb = model.obj_net[0].weight.data.numpy() if model_group_name == 'prob_label': obj_emb = obj_emb.T fill_cnt = 0 for i, line in enumerate(trn_set.objects_vocab): avail, vec = get_class_embedding(line, word_vec, emb_size) if avail: obj_emb[i] = vec fill_cnt += 1 logger.debug('[debug] class embedding filling count: {}/{}'.format( fill_cnt, len(trn_set.objects_vocab))) if model_group_name == 'prob_label': obj_emb = obj_emb.T model.obj_net[0].weight = nn.Parameter(torch.from_numpy(obj_emb)) model.cuda() if cfg.SOFT_LOSS: criterion = nn.BCEWithLogitsLoss().cuda() else: criterion = nn.CrossEntropyLoss().cuda() logger.debug('[Info] criterion name: ' + criterion.__class__.__name__) optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.wd) cudnn.benchmark = True # train logger.debug('[Info] start training...') is_best = False best_acc = 0 best_epoch = -1 for epoch in range(args.start_epoch, args.epochs): lr = adjust_learning_rate(optimizer, epoch) ploter.append(epoch, lr, 'lr') loss = train(train_loader, model, criterion, optimizer, epoch) ploter.append(epoch, loss, 'train-loss') if do_test: acc = validate(val_loader, model, criterion, epoch) ploter.append(epoch, acc, 'val-acc') if acc > best_acc: is_best = True best_acc = acc best_epoch = epoch logger.debug('Evaluate Result:\t' 'Acc {0}\t' 'Best {1} ({2})'.format(acc, best_acc, best_epoch)) # save checkpoint cp_fname = 'checkpoint-{:03}.pth.tar'.format(epoch) cp_path = os.path.join(cfg.LOG_DIR, cp_fname) state = { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_acc': best_acc, 'optimizer': optimizer.state_dict() } if epoch % args.save_freq == 0: torch.save(state, cp_path) if is_best: best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar') torch.save(state, best_path)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default='tgif_qa_action.yml', type=str) args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) assert cfg.dataset.name in ['tgif-qa', 'msrvtt-qa', 'msvd-qa', 'svqad-qa'] assert cfg.dataset.question_type in [ 'frameqa', 'count', 'transition', 'action', 'none' ] # check if the data folder exists assert os.path.exists(cfg.dataset.data_dir) # check if k_max is set correctly assert cfg.train.k_max_frame_level <= 16 assert cfg.train.k_max_clip_level <= 8 if not cfg.multi_gpus: torch.cuda.set_device(cfg.gpu_id) # make logging.info display into both shell and file cfg.dataset.save_dir = os.path.join(cfg.dataset.save_dir, cfg.exp_name) if not os.path.exists(cfg.dataset.save_dir): os.makedirs(cfg.dataset.save_dir) else: assert os.path.isdir(cfg.dataset.save_dir) log_file = os.path.join(cfg.dataset.save_dir, "log") if not cfg.train.restore and not os.path.exists(log_file): os.mkdir(log_file) else: assert os.path.isdir(log_file) fileHandler = logging.FileHandler(os.path.join(log_file, 'stdout.log'), 'w+') fileHandler.setFormatter(logFormatter) rootLogger.addHandler(fileHandler) # args display for k, v in vars(cfg).items(): logging.info(k + ':' + str(v)) # concat absolute path of input files if cfg.dataset.name == 'tgif-qa': cfg.dataset.train_question_pt = os.path.join( cfg.dataset.data_dir, cfg.dataset.train_question_pt.format(cfg.dataset.name, cfg.dataset.question_type)) cfg.dataset.val_question_pt = os.path.join( cfg.dataset.data_dir, cfg.dataset.val_question_pt.format(cfg.dataset.name, cfg.dataset.question_type)) cfg.dataset.vocab_json = os.path.join( cfg.dataset.data_dir, cfg.dataset.vocab_json.format(cfg.dataset.name, cfg.dataset.question_type)) cfg.dataset.appearance_feat = os.path.join( cfg.dataset.data_dir, cfg.dataset.appearance_feat.format(cfg.dataset.name, cfg.dataset.question_type)) cfg.dataset.motion_feat = os.path.join( cfg.dataset.data_dir, cfg.dataset.motion_feat.format(cfg.dataset.name, cfg.dataset.question_type)) else: cfg.dataset.question_type = 'none' cfg.dataset.appearance_feat = '{}_appearance_feat.h5' cfg.dataset.motion_feat = '{}_motion_feat.h5' cfg.dataset.vocab_json = '{}_vocab.json' cfg.dataset.train_question_pt = '{}_train_questions.pt' cfg.dataset.val_question_pt = '{}_val_questions.pt' cfg.dataset.train_question_pt = os.path.join( cfg.dataset.data_dir, cfg.dataset.train_question_pt.format(cfg.dataset.name)) cfg.dataset.val_question_pt = os.path.join( cfg.dataset.data_dir, cfg.dataset.val_question_pt.format(cfg.dataset.name)) cfg.dataset.vocab_json = os.path.join( cfg.dataset.data_dir, cfg.dataset.vocab_json.format(cfg.dataset.name)) cfg.dataset.appearance_feat = os.path.join( cfg.dataset.data_dir, cfg.dataset.appearance_feat.format(cfg.dataset.name)) cfg.dataset.motion_feat = os.path.join( cfg.dataset.data_dir, cfg.dataset.motion_feat.format(cfg.dataset.name)) # set random seed torch.manual_seed(cfg.seed) np.random.seed(cfg.seed) if torch.cuda.is_available(): torch.cuda.manual_seed_all(cfg.seed) train(cfg)
def main(): mode_list = ['train', 'test', 'train_pos', 'stats'] models_list = ['vgg', 'qnet', 'resnet', 'fcresnet', 'simple', 'auto'] start_time = time.time() parser = argparse.ArgumentParser() parser.add_argument('--cfg', help='configuration file to use.') parser.add_argument('--mode', help='mode (train or test') parser.add_argument('--model', help='model to train') parser.add_argument('--no-rsync', dest='no_rsync', help='use symbolic links instead', action='store_true') parser.add_argument('--toy', dest='toy', help='use toy data set', action='store_true') args = parser.parse_args() if args.cfg: print('Loading config {}.'.format(args.cfg)) cfg_from_file(args.cfg) else: print('No config given, using standard settings.') pprint(cfg) if args.model not in models_list: print('Model does not exist. Choose from {}. Exiting.'.format( models_list)) elif args.mode not in mode_list: print( 'Mode does not exist. Choose from {}. Exiting.'.format(mode_list)) else: rsyncing = False if args.no_rsync else True if rsyncing: print('Rsynced data! (run_script)', flush=True) else: print('Using symbolic links! (run_script)', flush=True) if args.mode == 'train': if args.model == 'vgg': train_feat(model_string='vgg', rsyncing=rsyncing, toy=args.toy) elif args.model == 'qnet': exp_name = get_f_save_check_tensorB_expName(cfg)[2] train_qnet(feat_model_string=exp_name, rsyncing=rsyncing, toy=args.toy) elif args.model == 'simple': train_feat(model_string='simple', rsyncing=rsyncing, toy=args.toy) elif args.model == 'resnet': train_feat(model_string='resnet', rsyncing=rsyncing, toy=args.toy) elif args.model == 'resnet_less': train_feat(model_string='resnet_less', rsyncing=rsyncing, toy=args.toy) elif args.model == 'fcresnet': train_feat(model_string='fcresnet', rsyncing=rsyncing, toy=args.toy) elif args.model == 'auto': train_feat('auto', rsyncing=rsyncing, toy=args.toy) elif args.mode == 'train_pos': if args.model == 'vgg': train_feat_pos(model_string='vgg', rsyncing=rsyncing, toy=args.toy) elif args.model == 'simple': train_feat(model_string='simple', rsyncing=rsyncing, toy=args.toy) elif args.model == 'resnet': train_feat_pos(model_string='resnet', rsyncing=rsyncing, toy=args.toy) else: print('Use pos only for feature net training') elif args.mode == 'test': if args.model == 'vgg': test_vgg() # Not yet implemented elif args.model == 'qnet': exp_name = get_f_save_check_tensorB_expName(cfg)[2] test_qnet(feat_model_string=exp_name, rsyncing=rsyncing, toy=args.toy) elif args.model == 'resnet' or args.model == 'simple': exp_name = get_f_save_check_tensorB_expName(cfg)[2] test_feat(feat_model_string=exp_name, rsyncing=rsyncing, toy=args.toy) # Not yet implemented elif args.mode == 'stats': exp_name = get_f_save_check_tensorB_expName(cfg)[2] get_stats(feat_model_string=exp_name, rsyncing=rsyncing, toy=args.toy) total_time = time.time() - start_time print('Main program executed in {:.0f} seconds.'.format(total_time))
def main(): args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # select device torch.cuda.set_device(args.gpu_id) print('[Info] use gpu: {}'.format(torch.cuda.current_device())) # get parameters sys.path.insert(0, args.model_dir) from params import params assert len(params) > 1 last_cfg = params[0][-1] last_cfg() get_data.main() dataset = VQADataset('test', params[0][1]) itoa = dataset.codebook['itoa'] vote_buff = [{} for i in range(len(dataset))] conf_buff = np.zeros((len(dataset), len(itoa))) sm_conf_buff = np.zeros((len(dataset), len(itoa))) l2_conf_buff = np.zeros((len(dataset), len(itoa))) que_ids = dataset.que_id for fpath, mgrp, mname, acc, cfg_func, in params: # data if cfg_func != last_cfg: cfg_func() get_data.main() last_cfg = cfg_func dataset = VQADataset('test', mgrp) itoa = dataset.codebook['itoa'] dataset.reload_obj(mgrp) dataloader = torch.utils.data.DataLoader( dataset, batch_size=args.bs, shuffle=False, num_workers=2, pin_memory=True) # model model_group = import_module('models.' + mgrp) model = getattr(model_group, mname) num_words=dataset.num_words, num_ans=dataset.num_ans, emb_size=get_emb_size()) cp_file = os.path.join(args.model_dir, fpath) checkpoint = torch.load(cp_file, map_location=lambda s, l: s.cuda(0)) model.load_state_dict(checkpoint['state_dict']) model.cuda() model.eval() # predict bar = progressbar.ProgressBar() start = 0 # sample: (que_id, img, que, [obj]) for sample in bar(dataloader): sample_var = [Variable(d).cuda() for d in list(sample)[1:]] score = model(*sample_var) sm_score = torch.nn.functional.softmax(score) l2_score = torch.nn.functional.normalize(score) bs = score.size(0) conf_buff[start:start+bs] += score.data.cpu().numpy() sm_conf_buff[start:start+bs] += sm_score.data.cpu().numpy() l2_conf_buff[start:start+bs] += l2_score.data.cpu().numpy() _, ans_ids = torch.max(score.data, dim=1) for i, ans_id in enumerate(ans_ids): ans = itoa[ans_id] ans_score = acc + vote_buff[start + i].get(ans, 0) vote_buff[start + i][ans] = ans_score start += bs
def main(): #-------------解析参数-------------# args = _parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) #读取args.cfg_file文件内容并融合到cfg中 pprint.pprint(cfg) #-------------任务相关配置-------------# #os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" #os.environ['CUDA_VISBLE_DEVICES'] = cfg.GPUS #os.environ['CUDA_VISBLE_DEVICES'] = '0' tf.logging.set_verbosity(tf.logging.INFO) #设置日志级别 #-------------搭建计算图-------------# with tf.device('/cpu:0'): # 操作密集型放在CPU上进行 query = tf.placeholder(dtype=tf.float32, shape=[None, 2]) num_gpus = len(cfg.GPUS.split(',')) # 建立dataset,获取iterator ite_val = reader.get_dataset_iter_valid(cfg) mem_val, mem_adj_val, gt_val = ite_val.get_next() # 在GPU上运行预测 with tf.variable_scope(tf.get_variable_scope( )) as vscope: # 见https://github.com/tensorflow/tensorflow/issues/6220 for i in range(num_gpus): with tf.device('/gpu:%d' % i), tf.name_scope('GPU_%d' % i) as scope: # 获取网络,并完成前传 graph_mem_net = GraphMemNet(cfg) logits = graph_mem_net.inference(mem_val, mem_adj_val, query) tf.get_variable_scope().reuse_variables() # saver model_variables_map_save = {} for variable in tf.trainable_variables(): model_variables_map_save[variable.name.replace(':0', '')] = variable print '#####################################################' for save_item in model_variables_map_save.keys(): print save_item print '#####################################################' saver_save = tf.train.Saver(var_list=model_variables_map_save, max_to_keep=cfg.TRAIN.MAX_MODELS_TO_KEEP) #-------------启动Session-------------# # (预测验证集,求取精度) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() #加载pretrained models tf.global_variables_initializer().run() saver_save.restore( sess, '/data/yinzhiyu/results/Graph-Memory-Networks/models-1') sess.graph.finalize() start_time = time.time() query_ = np.array([[1.0, 0.0]] * cfg.TRAIN.BATCH_SIZE, dtype=np.float) num_pred = 0 T = 0 P = 0 TP = 0 for i in range(cfg.TRAIN.MAX_ITE): try: print 'predicting %dth mol' % i output, gt = sess.run([logits, gt_val], feed_dict={query: query_}, options=run_options) ind_out = np.argmax(output, axis=1)[0] ind_gt = np.argmax(gt, axis=1)[0] num_pred += 1 if ind_gt == 0: P += 1 if ind_out == ind_gt: T += 1 TP += 1 else: if ind_out == ind_gt: T += 1 except: pre = float(T) / float(num_pred) recall = float(TP) / float(P) print 'finished!!!!' print 'F1 score is %.2f ' % (100.0 * 2.0 * (pre * recall / (pre + recall))) break
def main(): args = _parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) #读取args.cfg_file文件内容并融合到cfg中 pprint.pprint(cfg) # 读取索引文件 with open(osp.join(cfg.INPUT.DATA_DIR, cfg.INPUT.INDEX), 'r') as f: index = f.readlines() # 原子数最多的分子共含146个原子,可认为最大为150个,故mem_size=150,167数据集共86个原子,故one-hot设为90维...单个分子,最大bond数为162 suppl = Chem.SDMolSupplier(osp.join(cfg.INPUT.DATA_DIR, cfg.INPUT.SDF)) for i in range(len(suppl)): m = suppl[i] if m is None: continue # atom的one-hot编码,degree,H attached bond_feature = [np.array([], dtype=np.float)] for bond in m.GetBonds(): if bond.GetBondType() == rdkit.Chem.rdchem.BondType.SINGLE: bond_feature.append( np.array([1.0, 0.0, 0.0, 0.0], dtype=np.float)) elif bond.GetBondType() == rdkit.Chem.rdchem.BondType.DOUBLE: bond_feature.append( np.array([0.0, 1.0, 0.0, 0.0], dtype=np.float)) elif bond.GetBondType() == rdkit.Chem.rdchem.BondType.TRIPLE: bond_feature.append( np.array([0.0, 0.0, 1.0, 0.0], dtype=np.float)) else: bond_feature.append( np.array([0.0, 0.0, 0.0, 1.0], dtype=np.float)) # is bond in ring if bond.IsInRing(): bond_feature.append(np.array([1.0, 0.0], dtype=float)) else: bond_feature.append(np.array([0.0, 1.0], dtype=float)) bond_feature = np.concatenate(bond_feature) feature_mol = [] for atom in m.GetAtoms(): feature_atom = [] one_hot = np.zeros(cfg.NETWORK.ONE_HOT_DIM, dtype=np.float) one_hot[atom.GetAtomicNum()] = 1.0 feature_atom.append(one_hot) degree_onehot = np.zeros(cfg.NETWORK.MAX_NUM_DEGREE, dtype=np.float) degree = len([x.GetAtomicNum() for x in atom.GetNeighbors()]) degree_onehot[degree - 1] = 1.0 feature_atom.append(degree_onehot) feature_atom.append(bond_feature) # 整理feature_atom,并加入mem, feature_atom = np.concatenate(feature_atom) feature_mol.append(feature_atom) #此处未padding!!!!!!!!!!! feature_mol = np.array(feature_mol) # 构建邻接矩阵 adj = np.zeros((cfg.NETWORK.NUM_BOND_TYPE, cfg.NETWORK.MEM_SIZE, cfg.NETWORK.MEM_SIZE), dtype=np.float) for ii in range(len(m.GetAtoms())): for jj in range(len(m.GetAtoms())): if m.GetBondBetweenAtoms(ii, jj) == None: continue else: if m.GetBondBetweenAtoms(ii, jj).GetBondType( ) == rdkit.Chem.rdchem.BondType.SINGLE: adj[0][ii][jj] = 1.0 elif m.GetBondBetweenAtoms(ii, jj).GetBondType( ) == rdkit.Chem.rdchem.BondType.DOUBLE: adj[1][ii][jj] = 1.0 elif m.GetBondBetweenAtoms(ii, jj).GetBondType( ) == rdkit.Chem.rdchem.BondType.TRIPLE: adj[2][ii][jj] = 1.0 else: adj[3][ii][jj] = 1.0 # 保存 file_name = index[i].split()[0] mol_fea_adj = np.array([ feature_mol, adj, np.array([1.0, 0.0], dtype=np.float) if index[i].split()[1] == 'Active' else np.array([0.0, 1.0], dtype=np.float) ]) np.save( osp.join(cfg.INPUT.DATA_DIR, cfg.INPUT.FEATURE, file_name + '.npy'), mol_fea_adj) print '%d precessed\n' % i """
import os import pymysql from config import cfg_from_file from config import cfg # Whether to import external configuration if os.path.exists("resource/config.yaml"): cfg_from_file("resource/config.yaml") # MySQL Configuration host = cfg.database.mysql.host user = cfg.database.mysql.user password = cfg.database.mysql.passwd charset = cfg.database.mysql.charset db = cfg.database.mysql.db # Database name used """ database:数据库连接层 """ def get_conn(): # 建立连接 conn = pymysql.connect(host=host, user=user, password=password, db=db, charset=charset) # c创建游标 cursor = conn.cursor() return conn, cursor
def parse_args(): """ Parse input arguments """ parser = argparse.ArgumentParser(description='Train a keypoint regressor.') parser.add_argument('--cfg', dest='cfg_file', help='optional config file', default=None, type=str) parser.add_argument('--gpu', dest='gpu', help='GPU to use for running this.', default='0', type=str) parser.add_argument( '--save', dest='save', action='store_const', const=True, default=False, help='Set to save the features. Works only in mAP mode. ' '(Set in cfg).') parser.add_argument( '--outfpath', default=None, help='(Optional) Give a custom path to save the features. ' 'By def. picks a path in ckpt directory.') parser.add_argument('--preprocs', default=[], nargs='*', help='Set additional preprocs to do when testing. Eg. ' 'can put \'flips\'. This will flip images before ' 'pushing through the network. Can be useful for ' 'late fusion of multiple features.') parser.add_argument('--ept', dest='ept', nargs='+', type=str, default=[], help='Optional end point to store. ' 'By def store the softmax logits.') parser.add_argument('--split_name', default=None, type=str, help='Set to change the dataset split to run on. ' 'Eg, \'train\' or \'test\'.') parser.add_argument('--frames_per_video', default=None, type=int, help='Set to change the ' 'cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO.') parser.add_argument('--dataset_list_dir', default=None, type=str, help='Set to change the train_test_lists dir.') args = parser.parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) # Change config for some options if args.split_name is not None: cfg.TEST.DATASET_SPLIT_NAME = args.split_name if args.frames_per_video is not None: cfg.TEST.VIDEO_FRAMES_PER_VIDEO = args.frames_per_video if args.outfpath is not None: args.save = True return args, cfg
def test(para, iter, mode="Test"): model = AVGRunner(para, mode) model.run_benchmark(iter, mode=mode) if __name__ == '__main__': FLAGS = tf.flags.FLAGS device = FLAGS.device config = FLAGS.config paras = FLAGS.restore mode = FLAGS.mode step = FLAGS.iter os.environ['CUDA_VISIBLE_DEVICES'] = device cfg_from_file(config) print(c.SAVE_PATH) print(device, config, paras) runner = AVGRunner(paras, mode) try: if mode == 'train': runner.train() else: runner.run_benchmark(step, mode=mode) except Exception as e: runner.notifier.send("Something wrong\n" + str(e)) runner.logger.error(str(e)) else: runner.notifier.send("Done")
volume_index = [x.strip() for x in f.readlines()] img_list = [] for v in volume_index: fd_path = os.path.join(data_dir, v) if os.path.isfile(fd_path) and fd_path.endswith(IMG_SUFFIX): img_list += [v] else: img_list1 = [f for f in os.listdir(fd_path) if f.endswith(IMG_SUFFIX)] img_list1.sort(key=lambda x: int(x[:-4])) img_list1 = [os.path.join(v,f) for f in img_list1] img_list += img_list1 return img_list if __name__ == '__main__': cfg_from_file(default_cfg) caffe.set_device(GPU_ID) caffe.set_mode_gpu() net = caffe.Net(default_prototxt, default_model, caffe.TEST) print 'Processing img ...' image_index = get_image_index(image_set_file) im_num = len(image_index) vals = np.empty((im_num,)) for j in xrange(im_num): fn = os.path.join(data_dir, image_index[j]) print fn, '\t', im = load_img(fn) im -= cfg.PIXEL_MEANS data = np.zeros((1, 3, im.shape[0], im.shape[1]), dtype=np.float32)
def get_args(): """Get arguments from stdin.""" parser = argparse.ArgumentParser(description='Pytorch acoustic model.') parser.add_argument('--encoder', type=str, default='gru', help='encoder type {default: gru}') parser.add_argument('--num-anchor', type=int, default=10, metavar='HF', help='Num anchors per frame {default: 10.0}') parser.add_argument('--lambda-factor', type=float, default=5.0, metavar='HF', help='Balance factor between classification and regression loss (default: 5.0).') parser.add_argument('--input-dim', type=int, default=40, metavar='N', help='Input feature dimension without context (default: 40).') parser.add_argument('--kernel-size', type=int, default=3, metavar='N', help='Kernel size of Wavenet or CNN (default:3).') parser.add_argument('--hidden-dim', type=int, default=128, metavar='N', help='Hidden dimension of feature extractor (default: 128).') parser.add_argument('--num-layers', type=int, default=2, metavar='N', help='Numbers of hidden layers of feature extractor (default: 2).') parser.add_argument('--output-dim', type=int, default=2000, metavar='N', help='Output dimension, number of classes (default: 2000).') parser.add_argument('--dropout', type=float, default=0.0001, metavar='DR', help='dropout of feature extractor (default: 0.0001).') parser.add_argument('--left-context', type=int, default=5, metavar='N', help='Left context length for splicing feature (default: 5).') parser.add_argument('--right-context', type=int, default=5, metavar='N', help='Right context length for splicing feature (default: 5).') parser.add_argument('--max-epochs', type=int, default=20, metavar='N', help='Maximum epochs to train (default: 20).') parser.add_argument('--min-epochs', type=int, default=0, metavar='N', help='Minimum epochs to train (default: 0).') parser.add_argument('--batch-size', type=int, default=8, metavar='N', help='Batch size for training (default: 8).') parser.add_argument('--learning-rate', type=float, default=0.001, metavar='LR', help='Initial learning rate (default: 0.001).') parser.add_argument('--halving-factor', type=float, default=0.5, metavar='HF', help='Half factor for learning rate (default: 0.5).') parser.add_argument('--start-halving-impr', type=float, default=0.01, metavar='S', help='Improvement threshold to half the learning rate (default: 0.01).') parser.add_argument('--end-halving-impr', type=float, default=0.001, metavar='E', help='Improvement threshold to stop half learning rate (default: 0.001).') parser.add_argument('--init-weight-decay', type=float, default=1e-5, metavar='E', help='Weight decay of L2 normalization (default: 1e-5).') parser.add_argument('--seed', type=int, default=1234, metavar='S', help='Random seed (default: 1234).') parser.add_argument('--use-cuda', type=int, default=1, metavar='C', help='Use cuda (1) or cpu(0).') parser.add_argument('--multi-gpu', type=int, default=0, metavar='G', help='Use multi gpu (1) or not (0).') parser.add_argument('--train', type=int, default=1, help='Executing mode, train (1) or test (0).') parser.add_argument('--train-scp', type=str, default='', help='Training data file.') parser.add_argument('--dev-scp', type=str, default='', help='Development data file.') parser.add_argument('--save-dir', type=str, default='', help='Directory to output the model.') parser.add_argument('--load-model', type=str, default='', help='Previous model to load.') parser.add_argument('--test', type=int, default=0, help='Executing mode, 1 for test, 0 no test') parser.add_argument('--test-scp', type=str, default='', help='Test data file.') parser.add_argument('--output-file', type=str, default='', help='Test output file') parser.add_argument('--region-output-file', type=str, default='', help='Region output file') parser.add_argument('--log-interval', type=int, default=1000, metavar='N', help='How many batches to wait before logging training status.') parser.add_argument('--num-workers', type=int, default=1, metavar='N', help='How many workers used to load data') parser.add_argument('--config-file', type=str, default='', help='config file in yaml format') args = parser.parse_args() if args.config_file != '': cfg_from_file(args.config_file) return args
for v in volume_index: fd_path = os.path.join(data_dir, v) if os.path.isfile(fd_path) and fd_path.endswith(IMG_SUFFIX): img_list += [v] else: img_list1 = [ f for f in os.listdir(fd_path) if f.endswith(IMG_SUFFIX) ] img_list1.sort(key=lambda x: int(x[:-4])) img_list1 = [os.path.join(v, f) for f in img_list1] img_list += img_list1 return img_list if __name__ == '__main__': cfg_from_file(default_cfg) caffe.set_device(GPU_ID) caffe.set_mode_gpu() net = caffe.Net(default_prototxt, default_model, caffe.TEST) print 'Processing img ...' image_index = get_image_index(image_set_file) im_num = len(image_index) vals = np.empty((im_num, )) for j in xrange(im_num): fn = os.path.join(data_dir, image_index[j]) print fn, '\t', im = load_img(fn) im -= cfg.PIXEL_MEANS data = np.zeros((1, 3, im.shape[0], im.shape[1]), dtype=np.float32)
os.chdir(main_wd) im.save(fullpath) os.chdir(model_wd) for i in range(dataloader.batch_size): R_count += 1 if R_count >= 2000: cont = False break ii += 1 if __name__ == "__main__": main_wd = os.getcwd() cfg_from_file('experiments_bird.yml') print('Using config:') pprint.pprint(cfg) now = datetime.datetime.now(dateutil.tz.tzlocal()) timestamp = now.strftime('%Y_%m_%d_%H_%M_%S') output_dir = '%s/%s_%s_%s_%s' % \ (cfg.OUTPUT_PATH, cfg.DATASET_NAME, cfg.CONFIG_NAME, cfg.RUN, timestamp) output_global_dir = '%s/global' % cfg.OUTPUT_PATH mkdir_p(output_dir) mkdir_p(output_global_dir) os.chdir(main_wd) model_info = cfg.MODELS[cfg.RUN]
def main(): args=parse_args() # 将args.cfg_file融合到cfg中 from config import cfg, cfg_from_file if args.cfg_file is not None: cfg_from_file(args.cfg_file) tf.logging.info('Using Config:') pprint.pprint(cfg) # 指定或创建文件夹????? from config import get_output_dir train_dir = get_output_dir('default' if args.cfg_file is None else args.cfg_file) # 指定使用哪块GPU os.environ['CUDA_VISIBLE_DEVICES']=cfg.GPUS num_clones=len(cfg.GPUS.split(',')) # 设置日志显示级别 tf.logging.set_verbosity(tf.logging.INFO) # 创建计算图,并设置为默认图 with tf.Graph().as_default(): tf.set_random_seed(cfg.RNG_SEED)#????? # 关于之后要单间的模型的部署设置 deploy_config=model_deploy.DeploymentConfig(num_clones=num_clones,clone_on_cpu=False,replica_id=0,num_replica=1,num_ps.task=0) # 创建global_step with tf.device(deploy_config.variables_device()): global_step=slim.creat_global_step() # ------------------------------数据集------------------------------# kwargs={}# 保存关于如何使用视频的超参数视频 if cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO>1: kwargs['num_samples']=cfg.TRAIN.VIDEO_FRAMES_PER_VIDEO kwargs['randomFromSegmentStyle']=cfg.TRAIN.READ_SEGMENT_STYLE kwargs['modality'] = cfg.INPUT.VIDEO.MODALITY #输入模态:默认为rgb kwargs['split_id'] = cfg.INPUT.SPLIT_ID # 还有俩不知啥意思?????? # 选择预处理函数(也是作者重新修改过的!!!!!!!!!!!!!!!) from preprocessing import preprocessing_factory image_preprocessing_fn=preprocessing_factory.get_preprocessing(preprocessing_name,is_training=True) # 读取数据——获取Dataset对象 from datasets import dataset_factory #注意此datasets在本目录下,是作者自己编写的,其中的get_dataset函数没有发生变化,只是其调用的函数选项发生变化,是作者自定义的,返回Dataset对象和一个整数 dataset,num_pose_keypoints=dataset_factory.get_dataset(cfg.DATASET_NAME,cfg.DATASET_SPLIT_NAME,cfg.DATASET_DIR,**kwargs) # 读取数据——创建provider,读取+预处理,打包成batch,建立预取队列!!!!! with tf.device(deploy_config.inputs_device()): provider=slim.dataset_data_provider.DatasetDataProvider(dataset, num_readers= cfg.NUM_READERS, common_queue_capacity= 20*cfg.TRAIN.BATCH_SIZE, common_queue_min= 10*cfg.TRAIN.BATCH_SIZE) from preprocess_pipeline import train_preprocess_pipeline #该函数依据provider 和image_preprocessing_cn作为参数,从provider中读数据并且预处理 [image,pose_label_hmap,pose_label_valid,action_label]=train_preprocess_pipeline(provider,cfg, ,image_preprocessing_fn)# 真正读取数据?????????????? # 打包batch images,pose_labels_hmap,pose_labels_valid,action_labels=tf.train.batch([image,pose_label_hmap,pose_label_valid,action_label], batch_size=cfg.TRAIN.BATCH_SIZE, num_thread=cfg.NUM_PREPROCESSING_THREADS, capacity=5*cfg.TRAIN.BATCH_SIZE) # 建立数据读取队列 batch_queue=slim.prefetch_queue.prefetch_queue([images,pose_labels_hmap,pose_labels_valid,action_labels], capacity=5*deploy_config.clones.cfg.TRAIN.ITER_SIZE) # ------------------------------选择网络?????------------------------------# def clone_fn(batch_queue): # 出队一个batch images,labels_pose,labels_pose_valid,labels_action=batch_queue.dequeue() labels_pose=tf.concat(tf.unstack(labels_pose),axis=0) labels_pose_valid=tf.concat(tf.unstack(labels_pose_valid),axis=0) # 前传(输入images) 注意:网络不仅会输出分类logits,还会输出姿态,但姿态输出记录在end_points logits,end_points=network_fn(images) pose_logits=end_points['PoseLogits'] # 指定loss function 并计算loss # 该作者把一切都存进end_points里面了, end_points['Images']= images # 存储信息只end_points中 end_points['PoseLabels']= labels_pose end_points['ActionLabels']= labels_action end_points['ActionLogits']=logits gen_loss(labels_action,logits,cfg.TRAIN.LOSS_FN_ACTION, dataset.num_calsses,cfg.TRAIN.LOSS_FN_ACTION_WT, labels_pose,pose_logits,cfg.TRAIN.LOSS_FN_POSE, labels_pose_valid,cfg.TRAIN.LOSS_FN_POSE_WT, end_points,cfg)# 计算loss 该函数在loss模块中,loss.py就在当前路径下?????????????????? return end_points # 收集summary summaries=set(tf.get_collection(tf.GRAPH.SUMMARIES)) # clone是对输出和名称nz空间的封装 clones=model_deploy.creat_clones(deploy_config,clone_fn,[batch_queue]) first_clone_scope=deploy_config.clone_scope(0) update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS,first_clone_scope) from nets import nets_factory network_fn=net_factory.get_network_fn(cfg.MODEL_NAME,num_calsses=,num)#该函数作者又重新写过 # 为每一个end_point节点加入监控 for end_point in end_points: x= end_points[end_point] summaries.add(tf.summary.histogram('activations/'+ end_point, x)) # 加入图片summary sum_img=tf.concat(tf.unstack(end_points['Image'])) # unstack作用是取消堆叠,也就是一帧一帧零散出来,用list包裹 concat感觉像是将所有图片按空间拼接起来,方便看每一帧 if sum_img.get_shape().as_list()[-1] not in [1, 3, 4]: # 再做点处理 还不太懂??????? # 加入summary summaries.add(tf.summary.image('images',sum_img)) # 加入由于加入pose而导致模型中新增的endpoi for epname in cfg.TRAIN.OTHER_IMG_SUMMARY_TO_ADD: # OTHER_IMG_SUMMARIES_TO_ADD = ['PosePrelogitsBasedAttention'] if epname in end_points: summary.add(tf.summary.image('image_vis/'+ epname, end_points[epname])) summaries=summaries.union() # 求summaries和参数的并集,还赋给summaries??????? # 为loss增加summaries for loss in tf.get_collection(tf.Graphkeys.LOSSES,first_clone_scope): summaries.add(tf.summary.scalar(tensor=loss,name='losses/%s'% loss.op.name)) # 为变量增加summies for variable in slim.get_model_variables(): summaries.add(tf.summary.histogram(variable.op.name, variable)) # 配置滑动平均 (moving average) # 配置优化程序 with tf.device(deploy_config.optimizer_device()): # 设置学习率 learning_rate= _configure_learning_rate(dataset.num_samples, num_clones, global_step) # optimizer=_configure_optimizer(learning_rate) summaries.add(tf.summary.scalar(tensor=learning_rate,name='learning_rate')) # 设置哪些变量需要参与训练 variables_to_train=_get_variables_to_train()
def set_global_vars(use_arg_parser=True): global globalvars global image_width global image_height global dims_input_const global img_pad_value global normalization_const global map_file_path global epoch_size global num_test_images global model_folder global base_model_file global feature_node_name global last_conv_node_name global start_train_conv_node_name global pool_node_name global last_hidden_node_name global roi_dim global prediction global prediction_in global prediction_out if use_arg_parser: parser = argparse.ArgumentParser() parser.add_argument('-c', '--config', help='Configuration file in YAML format', required=False, default=None) parser.add_argument('-t', '--device_type', type=str, help="The type of the device (cpu|gpu)", required=False, default="cpu") parser.add_argument( '-d', '--device', type=int, help="Force to run the script on a specified device", required=False, default=None) parser.add_argument('-l', '--list_devices', action='store_true', help="Lists the available devices and exits", required=False, default=False) parser.add_argument('--prediction', action='store_true', help="Switches to prediction mode", required=False, default=False) parser.add_argument( '--prediction_in', action='append', type=str, help= "The input directory for images in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--prediction_out', action='append', type=str, help= "The output directory for processed images and predicitons in prediction mode. Can be supplied mulitple times.", required=False, default=list()) parser.add_argument( '--no_headers', action='store_true', help="Whether to suppress the header row in the ROI CSV files", required=False, default=False) parser.add_argument( '--output_width_height', action='store_true', help= "Whether to output width/height instead of second x/y in the ROI CSV files", required=False, default=False) parser.add_argument( '--suppressed_labels', type=str, help= "Comma-separated list of labels to suppress from being output in ROI CSV files.", required=False, default="") args = vars(parser.parse_args()) # prediction mode? prediction = args['prediction'] if prediction: prediction_in = args['prediction_in'] if len(prediction_in) == 0: raise RuntimeError("No prediction input directory provided!") for p in prediction_in: if not os.path.exists(p): raise RuntimeError( "Prediction input directory '%s' does not exist" % p) prediction_out = args['prediction_out'] if len(prediction_out) == 0: raise RuntimeError("No prediction output directory provided!") for p in prediction_out: if not os.path.exists(p): raise RuntimeError( "Prediction output directory '%s' does not exist" % p) if len(prediction_in) != len(prediction_out): raise RuntimeError( "Number of input and output directories don't match: %i != %i" % (len(prediction_in), len(prediction_out))) for i in range(len(prediction_in)): if prediction_in[i] == prediction_out[i]: raise RuntimeError( "Input and output directories #%i for prediction are the same: %s" % ((i + 1), prediction_in[i])) if args['list_devices']: print("Available devices (Type - ID - description)") for d in cntk.device.all_devices(): if d.type() == 0: type = "cpu" elif d.type() == 1: type = "gpu" else: type = "<unknown:" + str(d.type()) + ">" print(type + " - " + str(d.id()) + " - " + str(d)) sys.exit(0) if args['config'] is not None: cfg_from_file(args['config']) if args['device'] is not None: if args['device_type'] == 'gpu': cntk.device.try_set_default_device( cntk.device.gpu(args['device'])) else: cntk.device.try_set_default_device(cntk.device.cpu()) image_width = cfg["CNTK"].IMAGE_WIDTH image_height = cfg["CNTK"].IMAGE_HEIGHT # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) dims_input_const = MinibatchData( Value(batch=np.asarray([ image_width, image_height, image_width, image_height, image_width, image_height ], dtype=np.float32)), 1, 1, False) # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [ 114, 114, 114 ] normalization_const = Constant([[[103]], [[116]], [[ 123 ]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) # dataset specific parameters map_file_path = os.path.join(abs_path, cfg["CNTK"].MAP_FILE_PATH) globalvars['class_map_file'] = cfg["CNTK"].CLASS_MAP_FILE globalvars['train_map_file'] = cfg["CNTK"].TRAIN_MAP_FILE globalvars['test_map_file'] = cfg["CNTK"].TEST_MAP_FILE globalvars['train_roi_file'] = cfg["CNTK"].TRAIN_ROI_FILE globalvars['test_roi_file'] = cfg["CNTK"].TEST_ROI_FILE globalvars['output_path'] = cfg["CNTK"].OUTPUT_PATH epoch_size = cfg["CNTK"].NUM_TRAIN_IMAGES num_test_images = cfg["CNTK"].NUM_TEST_IMAGES # model specific parameters if cfg["CNTK"].PRETRAINED_MODELS.startswith(".."): model_folder = os.path.join(abs_path, cfg["CNTK"].PRETRAINED_MODELS) else: model_folder = cfg["CNTK"].PRETRAINED_MODELS base_model_file = os.path.join(model_folder, cfg["CNTK"].BASE_MODEL_FILE) feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME last_conv_node_name = cfg["CNTK"].LAST_CONV_NODE_NAME start_train_conv_node_name = cfg["CNTK"].START_TRAIN_CONV_NODE_NAME pool_node_name = cfg["CNTK"].POOL_NODE_NAME last_hidden_node_name = cfg["CNTK"].LAST_HIDDEN_NODE_NAME roi_dim = cfg["CNTK"].ROI_DIM data_path = map_file_path # set and overwrite learning parameters globalvars['rpn_lr_factor'] = cfg["CNTK"].RPN_LR_FACTOR globalvars['frcn_lr_factor'] = cfg["CNTK"].FRCN_LR_FACTOR globalvars['e2e_lr_factor'] = cfg["CNTK"].E2E_LR_FACTOR globalvars['momentum_per_mb'] = cfg["CNTK"].MOMENTUM_PER_MB globalvars['e2e_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].E2E_MAX_EPOCHS globalvars[ 'rpn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg["CNTK"].RPN_EPOCHS globalvars['frcn_epochs'] = 1 if cfg["CNTK"].FAST_MODE else cfg[ "CNTK"].FRCN_EPOCHS globalvars['rnd_seed'] = cfg.RNG_SEED globalvars['train_conv'] = cfg["CNTK"].TRAIN_CONV_LAYERS globalvars['train_e2e'] = cfg["CNTK"].TRAIN_E2E if not os.path.isdir(data_path): raise RuntimeError("Directory %s does not exist" % data_path) globalvars['class_map_file'] = os.path.join(data_path, globalvars['class_map_file']) globalvars['train_map_file'] = os.path.join(data_path, globalvars['train_map_file']) globalvars['test_map_file'] = os.path.join(data_path, globalvars['test_map_file']) globalvars['train_roi_file'] = os.path.join(data_path, globalvars['train_roi_file']) globalvars['test_roi_file'] = os.path.join(data_path, globalvars['test_roi_file']) globalvars['headers'] = not args['no_headers'] globalvars['output_width_height'] = args['output_width_height'] suppressed_labels = [] if len(args['suppressed_labels']) > 0: suppressed_labels = args['suppressed_labels'].split(",") globalvars['suppressed_labels'] = suppressed_labels if cfg["CNTK"].FORCE_DETERMINISTIC: force_deterministic_algorithms() np.random.seed(seed=globalvars['rnd_seed']) globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) globalvars['num_classes'] = len(globalvars['classes']) if cfg["CNTK"].DEBUG_OUTPUT: # report args print("Using the following parameters:") print("Flip image : {}".format(cfg["TRAIN"].USE_FLIPPED)) print("Train conv layers: {}".format(globalvars['train_conv'])) print("Random seed : {}".format(globalvars['rnd_seed'])) print("Momentum per MB : {}".format(globalvars['momentum_per_mb'])) if globalvars['train_e2e']: print("E2E epochs : {}".format(globalvars['e2e_epochs'])) else: print("RPN lr factor : {}".format(globalvars['rpn_lr_factor'])) print("RPN epochs : {}".format(globalvars['rpn_epochs'])) print("FRCN lr factor : {}".format(globalvars['frcn_lr_factor'])) print("FRCN epochs : {}".format(globalvars['frcn_epochs']))
def main(): global args args = parser.parse_args() args_str = json.dumps(vars(args), indent=2) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) # use timestamp as log subdirectory timestamp = args.timestamp cfg.LOG_DIR = os.path.join(cfg.LOG_DIR, timestamp) model_group_name, model_name = args.model.split('/') # setting log handlers sh = logging.StreamHandler(sys.stdout) sh.setLevel(logging.DEBUG) logger.addHandler(sh) logger.debug('[Info] called with: ' + args_str) logger.debug('[Info] timestamp: ' + timestamp) # select device torch.cuda.set_device(args.gpu_id) logger.debug('[Info] use gpu: {}'.format(torch.cuda.current_device())) # data assert (len(cfg.TEST.SPLITS) == 1 and cfg.TEST.SPLITS[0] in ('val2014')) logger.debug('[Info] init dataset') val_set = VQADataset('test', model_group_name) RES_DIR = '/home/lyt/code/bert-as-service-test/result' queIds, queFea, _ = load_data(split_name='val2014', RES_DIR=RES_DIR) assert queIds.tolist() == val_set.que_id.tolist() logger.debug('[Info] Clustering using {}, {} clusters'.format( args.cluster_alg, args.n_clusters)) clusfilename = '{}/{}/{}_{}_n{}.pkl'.format(RES_DIR, 'v2', 'train2014', args.cluster_alg, args.n_clusters) logger.debug('[Info] cluster file: {}'.format(clusfilename)) val_qTypeLabels = clustering(queFea, clu_num=args.n_clusters, clu_alg=args.cluster_alg, savefname=clusfilename) # model logger.debug('[Info] construct model') model_group = import_module('models.' + model_group_name) model = getattr(model_group, model_name)(num_words=val_set.num_words, num_ans=val_set.num_ans, emb_size=get_emb_size()) logger.debug('[Info] model name: ' + args.model) total_param = 0 for param in model.parameters(): total_param += param.nelement() logger.debug('[Info] total parameters: {}M'.format( math.ceil(total_param / 2**20))) model.cuda() cudnn.benchmark = True # load best model, predict logger.debug('[Info] load model ...') best_path = os.path.join(cfg.LOG_DIR, 'model-best.pth.tar') #if os.path.isfile(best_path): assert os.path.isfile(best_path) logger.debug("[Info] loading checkpoint '{}'".format(best_path)) cp_state = torch.load(best_path) best_acc = cp_state['best_acc'] logger.debug('[Info] best model with best acc {}'.format(best_acc)) model.load_state_dict(cp_state['state_dict']) #else: # logger.debug("[Info] no checkpoint found at '{}'".format(best_path)) for i in range(args.n_clusters): logger.debug('[Info] choose cluster ID: {}'.format(i)) #sel = val_qTypeLabels == args.clus_id sel = val_qTypeLabels == i val_quesIds = queIds[sel].tolist() logger.debug( '[Info] #Val set before/after clustering and choosing {}/{}'. format(queIds.shape[0], len(val_quesIds))) val_set_sub = select_subset(val_set, sel) val_loader = torch.utils.data.DataLoader(val_set_sub, batch_size=args.bs, shuffle=False, num_workers=args.workers, pin_memory=True) logger.debug('sample count: {}'.format(len(val_set_sub))) acc = validate(val_loader, model, None, None, quesIds=val_quesIds) logger.debug('Evaluate Result:\tAcc {0}'.format(acc))
parser.print_help() sys.exit(1) args = parser.parse_args() return args if __name__ == '__main__': args = parse_args() print('Called with args:') pprint.pprint(args) # set the global cfg variable from the file if args.cfg_file is not None: for cfg_file in args.cfg_file: cfg_from_file(cfg_file) if args.batch_size is not None: cfg_from_list(['BATCH_SIZE', args.batch_size]) if args.no_prefetch: cfg_from_list(['USE_PREFETCH', False]) print('Using config:') pprint.pprint(cfg) if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED) # set up caffe
def main(): #-------------解析参数-------------# args = _parse_args() if args.cfg_file is not None: cfg_from_file(args.cfg_file) #读取args.cfg_file文件内容并融合到cfg中 pprint.pprint(cfg) #-------------任务相关配置-------------# #os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" #os.environ['CUDA_VISBLE_DEVICES'] = cfg.GPUS tf.logging.set_verbosity(tf.logging.INFO) #设置日志级别 #-------------搭建计算图-------------# with tf.device('/cpu:0'): # 操作密集型放在CPU上进行 query = tf.placeholder(dtype=tf.float32, shape=[None, 2]) global_step = tf.get_variable('global_step', [], dtype=None, initializer=tf.constant_initializer(0), trainable=False) lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE_BASE, global_step, cfg.TRAIN.DECAY_STEP, cfg.TRAIN.DECAY_RATE, staircase=True) # 学习率 tf.summary.scalar('learnrate', lr) opt = tf.train.MomentumOptimizer(lr, cfg.TRAIN.MOMENTUM) # 优化函数 #opt = tf.train.GradientDescentOptimizer(lr) # 优化函数 num_gpus = len(cfg.GPUS.split(',')) # 建立dataset,获取iterator ite_train = reader.get_dataset_iter(cfg) mem, mem_adj, gt = ite_train.get_next() # 在GPU上运行训练 #tower_grads = [] with tf.variable_scope(tf.get_variable_scope( )) as vscope: # 见https://github.com/tensorflow/tensorflow/issues/6220 for i in range(num_gpus): with tf.device('/gpu:%d' % i), tf.name_scope('GPU_%d' % i) as scope: #query = np.array([[1.0,0.0]]*cfg.TRAIN.BATCH_SIZE,dtype=np.float) #query = tf.cast(tf.convert_to_tensor(query),tf.float32) # 获取网络,并完成前传 #with tf.Graph().as_default(): graph_mem_net = GraphMemNet(cfg) logits = graph_mem_net.inference(mem, mem_adj, query) tf.get_variable_scope().reuse_variables() # 做一个batch准确度的预测 prediction = tf.nn.softmax(logits) acc_batch = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(prediction, 1), tf.argmax(gt, 1)), tf.float32)) tf.summary.scalar('acc_on_batch', acc_batch) # 求loss for variable in tf.global_variables(): if variable.name.find( 'weights' ) > 0: # 把参数w加入集合tf.GraphKeys.WEIGHTS,方便做正则化(此句必须放在正则化之前) tf.add_to_collection(tf.GraphKeys.WEIGHTS, variable) loss = loss_func(cfg, logits, gt, regularization=True) tf.summary.scalar('loss', loss) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = opt.minimize(loss, global_step=global_step, var_list=tf.trainable_variables()) merged = tf.summary.merge_all() # saver model_variables_map_save = {} for variable in tf.trainable_variables(): model_variables_map_save[variable.name.replace(':0', '')] = variable print '#####################################################' for save_item in model_variables_map_save.keys(): print save_item print '#####################################################' saver_save = tf.train.Saver(var_list=model_variables_map_save, max_to_keep=cfg.TRAIN.MAX_MODELS_TO_KEEP) #-------------启动Session-------------# # (预测验证集,求取精度) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8) config = tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True) with tf.Session(config=config) as sess: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() joint_writer = tf.summary.FileWriter(cfg.SUMMARY_DIR, sess.graph) summary_writer = tf.summary.FileWriter(cfg.SUMMARY_DIR, sess.graph) #初始化变量(或加载pretrained models) tf.global_variables_initializer().run() #saver_save.restore(sess,'/data/yinzhiyu/results/Graph-Memory-Networks/models-4001') sess.graph.finalize() start_time = time.time() query_ = np.array([[1.0, 0.0]] * cfg.TRAIN.BATCH_SIZE, dtype=np.float) #query = tf.cast(tf.convert_to_tensor(query),tf.float32) for i in range(cfg.TRAIN.MAX_ITE): _, learnrate, loss_value, step, summary = sess.run( [train_step, lr, loss, global_step, merged], feed_dict={query: query_}, options=run_options, run_metadata=run_metadata) if i == 0: start_time = time.time() if i % 10 == 0: if i >= 1: end_time = time.time() avg_time = (end_time - start_time) / float(i + 1) print("Average time consumed per step is %0.2f secs." % avg_time) print( "After %d training step(s), learning rate is %g, loss on training batch is %g." % (step, learnrate, loss_value)) # 每个epoch验证一次,保存模型 if i % 2000 == 0: print '#############################################' print 'saving model...' saver_save.save(sess, cfg.TRAIN.SAVED_MODEL_PATTERN, global_step=global_step) print 'successfully saved !' print '#############################################' if i % 200 == 0: joint_writer.add_run_metadata(run_metadata, 'step%03d' % i) summary_writer.add_summary(summary, i) end_time = time.time() #print '%dth time step,consuming %f secs'%(i, start_time-end_time) summary_writer.close()
choices=['TH14', 'AN']) parser.add_argument('--expname', type=str, required=True) parser.add_argument('--rsltname', type=str, default='rslt') # This argument would only be used when testing. parser.add_argument('--pretrained', type=str, default='default.caffemodel') parser.add_argument('--num_workers', type=int, default=16) args = parser.parse_args() config_path = osp.join(cfg.EXP_DIR, args.dataset, args.expname, 'config.yml') # Init cfg_from_file(config_path) args.phase = args.phase.upper() setup(args.phase, args.dataset, args.expname, args.rsltname) # Check whether specified <pretrained> when testing: # a work-around for conditional required argument. if args.phase == 'TEST': assert args.pretrained != 'default.caffemodel', \ 'Speicify pretrained model when testing.' cfg.SNAPSHOT_PATH = osp.join(cfg.LOCAL_SNAPSHOT_PATH, args.pretrained) caffe.init_glog( osp.join(cfg.LOG_PATH, '{}{}.'.format(args.phase, cfg.INFIX))) pred_rslts = proc_prll(args.phase, args.num_workers)
action='store_true') args = parser.parse_args() return args if __name__ == '__main__': print('Doing training...') args = parse_args_train() print('Called with args:') print(args) if args.cfg_file is not None: cfg_from_file(args.cfg_file) cfg.GPU_ID = args.gpu_id cfg.train_imdb = args.train_imdb cfg.val_prototxt = args.test_prototxt cfg.test_prototxt = args.test_prototxt cfg.TRAIN.VALIDATION_ITERATION = eval(cfg.TRAIN.VALIDATION_ITERATION) print('Using config:') pprint.pprint(cfg) # if not args.randomize: # fix the random seeds (numpy and caffe) for reproducibility np.random.seed(cfg.RNG_SEED) caffe.set_random_seed(cfg.RNG_SEED)