def main(run_id): if args.dump_dir is not None: if args.runs > 1: args.current_dump_dir = os.path.join(args.dump_dir, 'run_{}'.format(run_id)) io.mkdir(args.current_dump_dir) else: args.current_dump_dir = args.dump_dir args.checkpoints_dir = os.path.join(args.current_dump_dir, 'checkpoints') io.mkdir(args.checkpoints_dir) args.summary_file = os.path.join(args.current_dump_dir, 'summary.json') logger.info(format_args(args)) model = Model() optimizer = get_optimizer(args.optimizer, model, args.lr) if args.accum_grad > 1: optimizer = AccumGrad(optimizer, args.accum_grad) trainer = MyTrainer.from_args(model, optimizer, args) if args.load_checkpoint is not None: trainer.load_checkpoint(args.load_checkpoint) if args.test_only: trainer.current_epoch = 0 return None, trainer.test() graduated = trainer.train() trainer.save_checkpoint('last') test_meters = trainer.test() if graduated or args.test_not_graduated else None return graduated, test_meters
def begin_html(self): if osp.isfile(self.visdir): raise FileExistsError('Visualization dir "{}" is a file.'.format( self.visdir)) elif osp.isdir(self.visdir): if yes_or_no( 'Visualization dir "{}" is not empty. Do you want to overwrite?' .format(self.visdir)): shutil.rmtree(self.visdir) else: raise FileExistsError( 'Visualization dir "{}" already exists.'.format( self.visdir)) io.mkdir(self.visdir) io.mkdir(osp.join(self.visdir, 'assets')) self._index_file = open(self.get_index_filename(), 'w') self._print('<html>') self._print('<head>') self._print('<title>{}</title>'.format(self.title)) self._print('<style>') self._print('td {vertical-align:top;padding:5px}') self._print('</style>') self._print('</head>') self._print('<body>') self._print('<h1>{}</h1>'.format(self.title))
def main(run_id): if args.dump_dir is not None: if args.runs > 1: args.current_dump_dir = os.path.join(args.dump_dir, 'run_{}'.format(run_id)) io.mkdir(args.current_dump_dir) else: args.current_dump_dir = args.dump_dir args.summary_file = os.path.join(args.current_dump_dir, 'summary.json') args.checkpoints_dir = os.path.join(args.current_dump_dir, 'checkpoints') io.mkdir(args.checkpoints_dir) logger.info(format_args(args)) model = Model() if args.use_gpu: model.cuda() optimizer = get_optimizer(args.optimizer, model, args.lr) if args.accum_grad > 1: optimizer = AccumGrad(optimizer, args.accum_grad) trainer = MyTrainer.from_args(model, optimizer, args) if args.load_checkpoint is not None: trainer.load_checkpoint(args.load_checkpoint) if args.test_only: return None, trainer.test() final_meters = trainer.train() trainer.save_checkpoint('last') return trainer.early_stopped, trainer.test()
def dump(self, save_dir): io.mkdir(save_dir) io.dump(osp.join(save_dir, 'mj_states.pkl'), self.gym_states) io.dump(osp.join(save_dir, 'objects.json'), dict( names=self.object_names, poses=self.object_poses )) io.dump(osp.join(save_dir, 'logs.json'), self.logs)
def maybe_mkdir(dirname): if osp.isdir(dirname): return if osp.isfile(dirname): return if yes_or_no('Creating directory "{}"?'.format(dirname)): io.mkdir(dirname)
def init_database(root): io.mkdir(osp.join(root, 'jacmldash')) path = osp.join(root, 'jacmldash', 'mldash.db') db = get_database() db.init(path) db.connect() for table in _tables: table._meta.set_database(db) db.create_tables(_tables)
def __init__(self, name, dump_dir=None, force_dump=False, state_mode='DEFAULT'): super().__init__() with get_env_lock(): self._gym = self._make_env(name) if dump_dir: jacio.mkdir(dump_dir) self._gym = gym.wrappers.Monitor(self._gym, dump_dir, force=force_dump) assert state_mode in ('DEFAULT', 'RENDER', 'BOTH') self._state_mode = state_mode
def get_asset_filename(self, row_identifier, col_identifier, ext): table_dir = osp.join(self.visdir, 'assets', 'table{}'.format(self._table_counter)) io.mkdir(table_dir) return osp.join(table_dir, '{}_{}.{}'.format(row_identifier, col_identifier, ext))
def _type_ensured_dir(string): if not osp.isdir(string): # TODO:: Change to Y/N question. import jacinle.io as io io.mkdir(string) return string
type=int, default=None, metavar='N', help='the interval(number of epochs) to do test') schedule_group.add_argument('--test-only', action='store_true', help='test-only mode') logger = get_logger(__file__) args = parser.parse_args() args.use_gpu = args.use_gpu and torch.cuda.is_available() if args.dump_dir is not None: io.mkdir(args.dump_dir) args.log_file = os.path.join(args.dump_dir, 'log.log') set_output_file(args.log_file) else: args.checkpoints_dir = None args.summary_file = None if args.seed is not None: import jacinle.random as random random.reset_global_seed(args.seed) args.task_is_outdegree = args.task in ['outdegree'] args.task_is_connectivity = args.task in ['connectivity'] args.task_is_adjacent = args.task in ['adjacent', 'adjacent-mnist'] args.task_is_family_tree = args.task in [ 'has-father', 'has-sister', 'grandparents', 'uncle', 'maternal-great-uncle'
def _type_ensured_dir(string): if not osp.isdir(string): # TODO(Jiayuan Mao @ 05/08): change to a Y/N question. import jacinle.io as io io.mkdir(string) return string
def main(): if args.dump_dir is not None: args.current_dump_dir = args.dump_dir args.summary_file = os.path.join(args.current_dump_dir, 'summary.json') args.checkpoints_dir = os.path.join(args.current_dump_dir, 'checkpoints') io.mkdir(args.checkpoints_dir) exp_fh = open(os.path.join(args.current_dump_dir, 'exp.sh'), 'a') print('jac-run {}'.format(' '.join(sys.argv)), file=exp_fh) exp_fh.close() logger.info('jac-run {}'.format(' '.join(sys.argv))) logger.info(format_args(args)) print(args.solution_count) model = models.get_model(args) if args.use_gpu: model.cuda() optimizer = get_optimizer(args.optimizer, model, args.lr, weight_decay=args.wt_decay) trainer = MyTrainer.from_args(model, optimizer, args) trainer.num_iters = 0 trainer.num_bad_updates = 0 trainer.test_batch_size = args.test_batch_size trainer.mode = 'warmup' trainer.checkpoint_mode = "warmup" trainer._latent_model = None trainer._static_model = None skip_warmup = False if args.load_checkpoint is not None: extra = trainer.load_checkpoint(args.load_checkpoint) #skip_warmup = extra is not None and (extra['name'] == 'last_warmup') skip_warmup = args.skip_warmup my_lr_scheduler = scheduler.CustomReduceLROnPlateau( trainer._optimizer, { 'mode': 'min', 'factor': 0.2, 'patience': math.ceil(7 / args.test_interval), 'verbose': True, 'threshold': 0.0001, 'threshold_mode': 'rel', 'cooldown': 0, 'min_lr': 0.01 * args.lr, 'eps': 0.0000001 }, maxPatienceToStopTraining=math.ceil(20 / args.test_interval)) trainer.my_lr_scheduler = my_lr_scheduler if args.test_only: # # trainer.load_latent_samples(os.path.join( # args.current_dump_dir, "latent_z_samples.pkl")) trainer.pred_dump = [] trainer.reset_test() rv = trainer.test() #with open(os.path.join(args.current_dump_dir, "pred_dump.pkl"), "wb") as f: # pickle.dump(trainer.pred_dump, f) trainer.dump_errors(force=True) with open(os.path.join(args.current_dump_dir, 'results.out'), "w") as f: print(rv[0].avg['corrected accuracy'], file=f) test_at_end(trainer) return None, rv if not skip_warmup: warmup_meters, warmup_test_meters = trainer.train( 1, args.warmup_epochs) trainer.save_checkpoint('last_warmup') else: logger.info("Skipping warmup") if args.epochs > 0: # define latent model # clone the main model # set the optimizer if skip_warmup: trainer._prepare_dataset(args.epoch_size, 'train') # trainer.checkpoint_mode = "hot" trainer.best_accuracy = -1 args.min_loss = 0 trainer._latent_model = models.get_latent_model(args, trainer.model) trainer._latent_model.train() if not args.no_static: trainer._static_model = copy.deepcopy(trainer._model) trainer._latent_optimizer = get_optimizer( args.optimizer, trainer._latent_model, args.lr_latent, weight_decay=args.latent_wt_decay) trainer.mode = "hot" # switch off training mode only after pretraining phi # since pretraining phi requires training statistics if not args.no_static: trainer._static_model.eval() #trainer._static_model.training = True # # if skip_warmup: # extra = trainer.load_checkpoint(args.load_checkpoint) trainer.datasets['train'].reset_sampler(args.hot_data_sampling) #trainer.datasets["train"].data_sampling = args.hot_data_sampling if not args.no_static: trainer._static_model.train() if args.pretrain_phi > 0: my_lr_scheduler.maxPatienceToStopTraining = 10000 for x in trainer._optimizer.param_groups: x['lr'] = 0.0 _ = trainer.train(args.warmup_epochs + 1, args.pretrain_phi) trainer.best_accuracy = -1 trainer._optimizer = get_optimizer(args.optimizer, trainer.model, args.lr_hot, weight_decay=args.wt_decay) my_lr_scheduler = scheduler.CustomReduceLROnPlateau( trainer._optimizer, { 'mode': 'min', 'factor': 0.2, 'patience': math.ceil(7 / args.test_interval), 'verbose': True, 'threshold': 0.01, 'threshold_mode': 'rel', 'cooldown': 0, 'min_lr': 0.01 * args.lr_hot, 'eps': 0.0000001 }, maxPatienceToStopTraining=math.ceil(25 / args.test_interval)) trainer.my_lr_scheduler = my_lr_scheduler final_meters = trainer.train( args.warmup_epochs + args.pretrain_phi + 1, args.epochs) trainer.save_checkpoint('last') trainer.load_checkpoint( os.path.join(args.checkpoints_dir, 'checkpoint_best.pth')) logger.info("Best Dev Accuracy: {}".format(trainer.best_accuracy)) trainer.reset_test() ret = trainer.test() trainer.dump_errors(force=True) with open(os.path.join(args.current_dump_dir, 'results.out'), "w") as f: print(trainer.best_accuracy, ret[0].avg['corrected accuracy'], file=f) test_at_end(trainer) return ret
def get_build_dir(self, func): name = self.get_name(func) build_dir = osp.join(tempfile.gettempdir(), 'jacinle_cython', name) io.mkdir(build_dir) return build_dir