def parse_yaml(yaml_path, model_id): from tensorflow.contrib.training import HParams from ruamel.yaml import YAML hparams = HParams() hparams.add_hparam('model_id', model_id) with open(yaml_path) as fp: customized = YAML().load(fp) for k, v in customized.items(): if k in hparams: hparams.set_hparam(k, v) else: hparams.add_hparam(k, v) return hparams
def _load_hparams(path): with open(os.path.join(path, 'hparams.json'), 'rb') as json_file: hparams_dict = { k.encode('utf-8'): v.encode('utf-8') if type(v) == unicode else v for k, v in json.load(json_file).iteritems() } hparams = HParams(**hparams_dict) hparams.set_hparam('data_dir', path) trainer_lib.add_problem_hparams(hparams, 'translate_mmt') # Removing dropout from HParams even on TRAIN mode for key in hparams.values(): if key.endswith("dropout"): setattr(hparams, key, 0.0) return hparams
def build_network(self, build_encoder=True): if build_encoder: self.encoder = Encoder(self._hp) self.decoder = DecoderModule(self._hp, # infer actions in decoder if not using SH-Pred model regress_actions=self._hp.regress_actions and self._hp.one_step_planner is not 'sh_pred') self.build_inference() if self._hp.regress_length: self.length_pred = LengthPredictorModule(self._hp) self.build_inference_encoder() if self._hp.attach_inv_mdl: self.inv_mdl = InverseModel(self._hp.inv_mdl_params, self._logger) if self._hp.attach_cost_mdl: self.cost_mdl = CostModel(self._hp.cost_mdl_params, self._logger) if self._hp.attach_state_regressor: self.state_regressor = BaseProcessingNet(self._hp.nz_enc, self._hp.nz_mid, self._hp.state_dim, self._hp.n_processing_layers, self._hp.fc_builder) if self._hp.separate_cnn_start_goal_encoder: from blox.torch.layers import LayerBuilderParams from tensorflow.contrib.training import HParams with_conv_hp = HParams() for k in self._hp.values().keys(): with_conv_hp.add_hparam(k, self._hp.values()[k]) #with_conv_hp = self._hp with_conv_hp.set_hparam('use_convs', True) with_conv_hp.set_hparam('input_nc', 3) with_conv_hp.set_hparam('builder', LayerBuilderParams( True, self._hp.use_batchnorm, self._hp.normalization, self._hp.predictor_normalization)) self.start_goal_enc = Encoder(with_conv_hp)
# update tensorflow's hparams object using values from # python's ArgumentParser. parser = argparse.ArgumentParser() params, pargs = update_params_from_parser(params, parser) logger.info('Load echonest dataset') dataset = fetch_echonest(data_home='data/echonest', min_playcount=params.min_playcount, min_interactions=params.min_interactions) n_users = dataset['interactions'].shape[0] n_items = dataset['interactions'].shape[1] # update n_users, n_items parameters if hasattr(params, 'n_users'): params.set_hparam('n_users', n_users) else: params.add_hparam('n_users', n_users) if hasattr(params, 'n_items'): params.set_hparam('n_items', n_items) else: params.add_hparam('n_items', n_items) model_path = os.path.join( 'echonest_pltcnt{}_minint{}'.format(params.min_playcount, params.min_interactions), 'batch_{}'.format(params.batch_size), 'lr{}_dim{}_nepochs{}_sampler-{}_batchsize{}_negs{}'.format( params.learning_rate, params.embedding_dim, params.n_epochs, params.sampler, params.batch_size, params.n_negatives))
def parse_args(yaml_path, model_id, default_set, followup=None): logger = logging.getLogger(APP_NAME) hparams = HParams() hparams.add_hparam('model_id', model_id) with open('default.yaml') as fp: configs = YAML().load(fp) default_cfg = configs[default_set] add_param_recur(hparams, default_cfg) if yaml_path: logger.info('loading parameters...') with open(yaml_path) as fp: customized = YAML().load(fp) for k, v in customized.items(): if k in hparams and hparams.get(k) != v: logger.info('%20s: %20s -> %20s' % (k, hparams.get(k), v)) hparams.set_hparam(k, v) elif k not in hparams: # add new parameter hparams.add_hparam(k, v) logger.info( '%30s %20s: %20s' % ("[add from %s]" % yaml_path, k, hparams.get(k))) if followup: # useful when changing args for prediction logger.info('override args with follow-up args...') for k, v in followup.items(): if k in hparams and hparams.get(k) != v: logger.info('%20s: %20s -> %20s' % (k, hparams.get(k), v)) hparams.set_hparam(k, v) elif k not in hparams: logger.warning('%s is not a valid attribute! ignore!' % k) if 'save_dir' not in hparams: hparams.add_hparam( 'save_dir', os.path.join(hparams.get('model_dir'), hparams.get('model_id'))) if 'code_dir' not in hparams: hparams.add_hparam('code_dir', os.path.join(hparams.get('save_dir'), 'code')) hparams.set_hparam('summary_dir', os.path.join(hparams.get('save_dir'), 'summary')) # reset logger model id logger = set_logger(model_id='%s:%s' % (DEVICE_ID, hparams.get('model_id'))) try: shutil.copytree('./', hparams.get('code_dir'), ignore=shutil.ignore_patterns(*IGNORE_PATTERNS)) logger.info('current code base is copied to %s' % hparams.get('save_dir')) except FileExistsError: logger.info('code base exist, no need to copy!') # if hparams.get('model_id') != model_id: # logger.warning('model id is changed %s -> %s! ' # 'This happens when you train a pretrained model' % ( # hparams.get('model_id'), model_id)) # hparams.set_hparam('model_id', model_id) if 'loss_csv_file' not in hparams: hparams.add_hparam('loss_csv_file', os.path.join(hparams.get('save_dir'), 'loss.csv')) if 'is_serving' not in hparams: hparams.add_hparam('is_serving', False) logger.info('current parameters') for k, v in sorted(vars(hparams).items()): if not k.startswith('_'): logger.info('%20s = %-20s' % (k, v)) return hparams
src_max_len=200, tgt_max_len=200, infer_src_max_len=200, infer_tgt_max_len=200, optimizer='adam', learning_rate=0.0001, final_learning_rate=0.00002, warmup_steps=4000, bucket=0, use_bpe=False, attention_mechanism='multi_head', beam_width=1, encoder_type='bi', unit_type='gru', dropout=0.2, num_residual_layers=0, sos='<s>', eos='</s>', pad='<pad>', unk='<unk>', ) if params_scale == 'medium': hparams.set_hparam('batch_size', 32) hparams.set_hparam('bpe_num_symbols', 24000) hparams.set_hparam('num_units', 256) elif params_scale == 'large': hparams.set_hparam('batch_size', 64) hparams.set_hparam('bpe_num_symbols', 32000) hparams.set_hparam('num_units', 512)