def _parts_id(hyper, parts, short=False, hashed=False): id_parts = [] for key, value in parts.items(): if value is None: continue clsname, params = value type_str = clsname.split('.')[-1] id_parts.append(type_str) # Precidence of specifications (from lowest to highest) # SF=single flag, EF=explicit flag # SF-short, SF-hash, EF-short EF-hash request_short = short is True request_hash = hashed is True if (ub.iterable(short) and key in short): request_hash = False request_short = True if (ub.iterable(hashed) and key in hashed): request_hash = True request_short = False if request_hash: param_str = util.make_idstr(params) param_str = _hash_data(param_str)[0:6] elif request_short: param_str = util.make_short_idstr(params) else: param_str = util.make_idstr(params) if param_str: id_parts.append(param_str) idstr = ','.join(id_parts) return idstr
def train_info(hyper, train_dpath=None): """ Create json metadata that details enough information such that it would be possible for a human to reproduce the experiment. Example: >>> import netharn as nh >>> datasets = { >>> 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), >>> 'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), >>> } >>> hyper = nh.hyperparams.HyperParams(**{ >>> # --- Data First >>> 'datasets' : datasets, >>> 'name' : 'demo', >>> 'workdir' : ub.ensure_app_cache_dir('netharn/demo'), >>> 'loaders' : {'batch_size': 64}, >>> 'xpu' : nh.XPU.coerce('auto'), >>> # --- Algorithm Second >>> 'model' : (nh.models.ToyNet2d, {}), >>> 'optimizer' : (nh.optimizers.SGD, { >>> 'lr': 0.001 >>> }), >>> 'criterion' : (nh.criterions.CrossEntropyLoss, {}), >>> #'criterion' : (nh.criterions.FocalLoss, {}), >>> 'initializer' : (nh.initializers.KaimingNormal, { >>> 'param': 0, >>> }), >>> 'scheduler' : (nh.schedulers.ListedLR, { >>> 'step_points': {0: .001, 2: .01, 5: .015, 6: .005, 9: .001}, >>> 'interpolate': True, >>> }), >>> 'monitor' : (nh.Monitor, { >>> 'max_epoch': 10 >>> }), >>> }) >>> info = hyper.train_info() >>> print(ub.repr2(info)) """ given_explicit_train_dpath = train_dpath is not None # TODO: needs MASSIVE cleanup and organization # TODO: if pretrained is another netharn model, then we should read that # train_info if it exists and append it to a running list of train_info if hyper.model_cls is None: # import utool # utool.embed() raise ValueError('model_cls is None') # arch = hyper.model_cls.__name__ train_dset = hyper.datasets.get('train', None) if train_dset is not None and hasattr(train_dset, 'input_id'): input_id = train_dset.input_id if callable(input_id): input_id = input_id() else: warnings.warn( 'FitHarn cannot track the training dataset state because ' 'harn.datasets["train"] is missing the "input_id" attribute.') input_id = 'none' def _hash_data(data): return ub.hash_data(data, hasher='sha512', base='abc', types=True) train_hyper_id_long = hyper.hyper_id() train_hyper_id_brief = hyper.hyper_id(short=False, hashed=True) train_hyper_hashid = _hash_data(train_hyper_id_long)[:8] # TODO: hash this to some degree other_id = hyper.other_id() augment_json = hyper.augment_json() aug_brief = 'AU' + _hash_data(augment_json)[0:6] # extra_hash = _hash_data([hyper.centering])[0:6] train_id = '{}_{}_{}_{}'.format( _hash_data(input_id)[:6], train_hyper_id_brief, aug_brief, other_id) # Gather all information about this run into a single hash """ NOTE: On choosing the length to truncate the hash. If we have an alphabet of size A=26, and we truncate to M=8 samples, then the number of possible hash values is N = A ** M. The probability we will have a collision (assuming an ideal hash function where all outputs are equally likely) in r different inputs is given by the following function. Note this is the birthday paradox problem [1]. ```python from scipy import exp, log from scipy.special import gammaln def prob_unique(N, r): return exp( gammaln(N+1) - gammaln(N-r+1) - r*log(N) ) A = 26 # size of the alphabet for _hash_data M = 8 # number of characters we truncate at N = A ** M # number of possible hash values r = 1000 prob_collision = 1 - prob_unique(N, r) print('prob_collision = {!r}'.format(prob_collision)) ``` This is approximately 0.00056 or about 1 in 1784. When r = 10000, it becomes had to compute the number because of floating point errors, but the probability is likely astronomically low. I doubt we will ever run training in the same work directory (and with the same nice "name") 10,000 different times, so using an 8 character hash seems safe and user friendly for this purpose. Perhaps we may move to 12, 16, or 32+ in the future, but for the pre 1.0 netharn, 8 seems fine. References: ..[1] https://www.johndcook.com/blog/2016/01/30/general-birthday-problem/ """ train_hashid = _hash_data(train_id)[0:8] name = hyper.name nice_dpath = None name_dpath = None if not given_explicit_train_dpath: # setup a cannonical and a linked symlink dir train_dpath = normpath( join(hyper.workdir, 'fit', 'runs', name, train_hashid)) # also setup a custom "name", which may conflict. This will # overwrite an existing "name" symlink, but the real runs directory # is based on a hash, so it wont be overwritten with astronomicaly # high probability. if name: try: name_dpath = normpath( join(hyper.workdir, 'fit', 'name', name)) nice_dpath = normpath( join(hyper.workdir, 'fit', 'nice', name)) except Exception: print('hyper.workdir = {!r}'.format(hyper.workdir)) print('hyper.name = {!r}'.format(hyper.name)) raise # make temporary initializer so we can infer the history temp_initializer = hyper.make_initializer() init_history = temp_initializer.history() train_info = ub.odict([ ('train_hashid', train_hashid), ('train_id', train_id), ('workdir', hyper.workdir), ('aug_brief', aug_brief), ('input_id', input_id), ('other_id', other_id), ('hyper', hyper.get_initkw()), ('train_hyper_id_long', train_hyper_id_long), ('train_hyper_id_brief', train_hyper_id_brief), ('train_hyper_hashid', train_hyper_hashid), ('init_history', init_history), ('init_history_hashid', _hash_data(util.make_idstr(init_history))), ('name', hyper.name), ('nice', hyper.name), ('old_train_dpath', normpath(join(hyper.workdir, 'fit', 'runs', train_hashid))), ('train_dpath', train_dpath), # ('link_dpath', link_dpath), # "nice" will be deprecated for "name_dpath" ('nice_dpath', nice_dpath), ('name_dpath', name_dpath), ('given_explicit_train_dpath', given_explicit_train_dpath), # TODO, add in classes if applicable # TODO, add in centering if applicable # ('centering', hyper.centering), ('other', hyper.other), # HACKED IN ('augment', hyper.augment_json()), ('extra', hyper.extra), ('argv', sys.argv), ('hostname', platform.node()), ]) return train_info
def train_info(self, short=True, hashed=True): """ CommandLine: python ~/code/netharn/netharn/folders.py Folders.train_info Example: >>> import netharn as nh >>> datasets = { >>> 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), >>> 'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), >>> } >>> hyper = nh.hyperparams.HyperParams(**{ >>> # --- Data First >>> 'datasets' : datasets, >>> 'nice' : 'demo', >>> 'workdir' : ub.ensure_app_cache_dir('netharn/demo'), >>> 'loaders' : {'batch_size': 64}, >>> 'xpu' : nh.XPU.cast('auto'), >>> # --- Algorithm Second >>> 'model' : (nh.models.ToyNet2d, {}), >>> 'optimizer' : (nh.optimizers.SGD, { >>> 'lr': 0.001 >>> }), >>> 'criterion' : (nh.criterions.CrossEntropyLoss, {}), >>> #'criterion' : (nh.criterions.FocalLoss, {}), >>> 'initializer' : (nh.initializers.KaimingNormal, { >>> 'param': 0, >>> }), >>> 'scheduler' : (nh.schedulers.ListedLR, { >>> 'step_points': {0: .001, 2: .01, 5: .015, 6: .005, 9: .001}, >>> 'interpolate': True, >>> }), >>> 'monitor' : (nh.Monitor, { >>> 'max_epoch': 10 >>> }), >>> }) >>> folders = Folders(hyper) >>> info = folders.train_info() >>> print(ub.repr2(info)) """ # TODO: needs MASSIVE cleanup and organization # TODO: if pretrained is another netharn model, then we should read that # train_info if it exists and append it to a running list of train_info hyper = self.hyper if hyper.model_cls is None: # import utool # utool.embed() raise ValueError('model_cls is None') # arch = hyper.model_cls.__name__ train_dset = hyper.datasets['train'] if hasattr(train_dset, 'input_id'): input_id = train_dset.input_id if callable(input_id): input_id = input_id() else: input_id = 'none' train_hyper_id_long = hyper.hyper_id() train_hyper_id_brief = hyper.hyper_id(short=short, hashed=hashed) train_hyper_hashid = ub.hash_data(train_hyper_id_long)[:8] # TODO: hash this to some degree other_id = hyper.other_id() augment_json = hyper.augment_json() aug_brief = 'AU' + ub.hash_data(augment_json)[0:6] # extra_hash = ub.hash_data([hyper.centering])[0:6] train_id = '{}_{}_{}_{}'.format( ub.hash_data(input_id)[:6], train_hyper_id_brief, aug_brief, other_id) # Gather all information about this run into a single hash train_hashid = ub.hash_data(train_id)[0:8] # input_dname = 'input_' + input_id # verbose_dpath = join(self.hyper.workdir, 'fit', 'link', 'arch', arch, input_dname, train_id) hashed_dpath = join(self.hyper.workdir, 'fit', 'runs', train_hashid) # setup a cannonical and a linked symlink dir train_dpath = hashed_dpath # link_dpath = verbose_dpath # also setup a "nice" custom name, which may conflict, but oh well if hyper.nice: nice_dpath = join(self.hyper.workdir, 'fit', 'nice', hyper.nice) else: nice_dpath = None # make temporary initializer so we can infer the history temp_initializer = hyper.make_initializer() init_history = temp_initializer.history() train_info = ub.odict([ ('train_hashid', train_hashid), ('train_id', train_id), ('workdir', self.hyper.workdir), ('aug_brief', aug_brief), ('input_id', input_id), ('other_id', other_id), ('hyper', hyper.get_initkw()), ('train_hyper_id_long', train_hyper_id_long), ('train_hyper_id_brief', train_hyper_id_brief), ('train_hyper_hashid', train_hyper_hashid), ('init_history', init_history), ('init_history_hashid', ub.hash_data(util.make_idstr(init_history))), ('nice', hyper.nice), ('train_dpath', train_dpath), # ('link_dpath', link_dpath), ('nice_dpath', nice_dpath), # TODO, add in n_classes if applicable # TODO, add in centering if applicable # ('centering', hyper.centering), # HACKED IN ('augment', hyper.augment_json()), ]) return train_info
def train_info(self, train_dpath=None, short=True, hashed=True): """ TODO: maybe this doesn't belong in folders? CommandLine: python ~/code/netharn/netharn/folders.py Folders.train_info Example: >>> import netharn as nh >>> datasets = { >>> 'train': nh.data.ToyData2d(size=3, border=1, n=256, rng=0), >>> 'vali': nh.data.ToyData2d(size=3, border=1, n=128, rng=1), >>> } >>> hyper = nh.hyperparams.HyperParams(**{ >>> # --- Data First >>> 'datasets' : datasets, >>> 'nice' : 'demo', >>> 'workdir' : ub.ensure_app_cache_dir('netharn/demo'), >>> 'loaders' : {'batch_size': 64}, >>> 'xpu' : nh.XPU.cast('auto'), >>> # --- Algorithm Second >>> 'model' : (nh.models.ToyNet2d, {}), >>> 'optimizer' : (nh.optimizers.SGD, { >>> 'lr': 0.001 >>> }), >>> 'criterion' : (nh.criterions.CrossEntropyLoss, {}), >>> #'criterion' : (nh.criterions.FocalLoss, {}), >>> 'initializer' : (nh.initializers.KaimingNormal, { >>> 'param': 0, >>> }), >>> 'scheduler' : (nh.schedulers.ListedLR, { >>> 'step_points': {0: .001, 2: .01, 5: .015, 6: .005, 9: .001}, >>> 'interpolate': True, >>> }), >>> 'monitor' : (nh.Monitor, { >>> 'max_epoch': 10 >>> }), >>> }) >>> folders = Folders(hyper) >>> info = folders.train_info() >>> print(ub.repr2(info)) """ given_explicit_train_dpath = train_dpath is not None # TODO: needs MASSIVE cleanup and organization # TODO: if pretrained is another netharn model, then we should read that # train_info if it exists and append it to a running list of train_info hyper = self.hyper if hyper.model_cls is None: # import utool # utool.embed() raise ValueError('model_cls is None') # arch = hyper.model_cls.__name__ train_dset = hyper.datasets.get('train', None) if train_dset is not None and hasattr(train_dset, 'input_id'): input_id = train_dset.input_id if callable(input_id): input_id = input_id() else: warnings.warn( 'FitHarn cannot track the training dataset state because ' 'harn.datasets["train"] is missing the "input_id" attribute.') input_id = 'none' def _hash_data(data): return ub.hash_data(data, hasher='sha512', base='abc', types=True) train_hyper_id_long = hyper.hyper_id() train_hyper_id_brief = hyper.hyper_id(short=short, hashed=hashed) train_hyper_hashid = _hash_data(train_hyper_id_long)[:8] # TODO: hash this to some degree other_id = hyper.other_id() augment_json = hyper.augment_json() aug_brief = 'AU' + _hash_data(augment_json)[0:6] # extra_hash = _hash_data([hyper.centering])[0:6] train_id = '{}_{}_{}_{}'.format( _hash_data(input_id)[:6], train_hyper_id_brief, aug_brief, other_id) # Gather all information about this run into a single hash train_hashid = _hash_data(train_id)[0:8] nice = hyper.nice nice_dpath = None if not given_explicit_train_dpath: # setup a cannonical and a linked symlink dir train_dpath = normpath( join(self.hyper.workdir, 'fit', 'runs', nice, train_hashid)) # also setup a "nice" custom name, which may conflict, but oh well if nice: try: nice_dpath = normpath( join(self.hyper.workdir, 'fit', 'nice', nice)) except Exception: print('self.hyper.workdir = {!r}'.format( self.hyper.workdir)) print('hyper.nice = {!r}'.format(hyper.nice)) raise # make temporary initializer so we can infer the history temp_initializer = hyper.make_initializer() init_history = temp_initializer.history() train_info = ub.odict([ ('train_hashid', train_hashid), ('train_id', train_id), ('workdir', self.hyper.workdir), ('aug_brief', aug_brief), ('input_id', input_id), ('other_id', other_id), ('hyper', hyper.get_initkw()), ('train_hyper_id_long', train_hyper_id_long), ('train_hyper_id_brief', train_hyper_id_brief), ('train_hyper_hashid', train_hyper_hashid), ('init_history', init_history), ('init_history_hashid', _hash_data(util.make_idstr(init_history))), ('nice', hyper.nice), ('old_train_dpath', normpath(join(self.hyper.workdir, 'fit', 'runs', train_hashid))), ('train_dpath', train_dpath), # ('link_dpath', link_dpath), ('nice_dpath', nice_dpath), ('given_explicit_train_dpath', given_explicit_train_dpath), # TODO, add in n_classes if applicable # TODO, add in centering if applicable # ('centering', hyper.centering), ('other', hyper.other), # HACKED IN ('augment', hyper.augment_json()), ('extra', hyper.extra), ('argv', sys.argv), ('hostname', platform.node()), ]) return train_info