def train(train_fn, test_fn, reporthook=None, final_reporthook=None, **cfg_kwargs): # pragma: no cover from longling.ML.toolkit.hyper_search import prepare_hyper_search cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search( cfg_kwargs, Configuration, reporthook, final_reporthook, primary_key="macro_avg:f1") _cfg = Configuration(**cfg_kwargs) print(_cfg) _net = get_net(**_cfg.hyper_params) train_data = etl(_cfg.var2val(train_fn), params=_cfg) test_data = etl(_cfg.var2val(test_fn), params=_cfg) numerical_check(_net, _cfg, train_data, test_data, dump_result=not tag, reporthook=reporthook, final_reporthook=final_reporthook)
def train(train_fn, test_fn, vec_files, reporthook=None, final_reporthook=None, **cfg_kwargs): # pragma: no cover from longling import print_time from longling.ML.toolkit.hyper_search import prepare_hyper_search cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search( cfg_kwargs, Configuration, reporthook, final_reporthook, final_key="prf:avg:f1" ) _cfg = Configuration(**cfg_kwargs) _cfg.logger.info(_cfg) vec_files = parse_vec_files(vec_files) with print_time(tips='loading embedding', logger=_cfg.logger): embeddings = load_embedding({k: _cfg.var2val(v) for k, v in vec_files.items()}, _cfg.logger) train_data = etl(_cfg.var2val(train_fn), embeddings, params=_cfg) test_data = etl(_cfg.var2val(test_fn), embeddings, params=_cfg) embedding_size = get_embedding_size(embeddings) _net = get_net(embedding_size=embedding_size, **_cfg.hyper_params) _net.initialize(ctx=_cfg.ctx) _net.embedding.set_weight(get_embedding_array(embeddings)) numerical_check(_net, _cfg, train_data, test_data, dump_result=not tag, reporthook=reporthook, final_reporthook=final_reporthook)
def train(train_fn, test_fn, **cfg_kwargs): _cfg = Configuration(**cfg_kwargs) _net = get_net(**_cfg.hyper_params) train_data = etl(_cfg.var2val(train_fn), params=_cfg) test_data = etl(_cfg.var2val(test_fn), params=_cfg) numerical_check(_net, _cfg, train_data, test_data, dump_result=True)
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration( hyper_params={ "ku_num": 835, "latent_dim": 600, "hidden_num": 900, }, nettype="EmbedDKT", ) net = get_net(**cfg.hyper_params) if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/train.json", "$data_dir/test.json", dataset="assistment_2009_2010", ctx=mx.gpu(2), optimizer_params={"learning_rate": 0.001}, hyper_params={ "ku_num": 146, "hidden_num": 200, "dropout": 0.5 }, batch_size=16, ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration(hyper_params={}) net = get_net(**cfg.hyper_params) net.initialize() if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/train.csv", "$data_dir/test.csv", ctx=mx.cpu(), optimizer_params={ "learning_rate": 0.01, "clip_gradient": 1, }, dataset="a0910", # init_params_update={"initial_user_item": False}, # workspace="random", workspace="init", # params_save=True ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def sym_run(stage: (int, str) = "pesudo"): # pragma: no cover if isinstance(stage, str): stage = { "pseudo": 0, "real": 1, "cli": 2, }[stage] if stage == 0: # ############################## Pesudo Test ################################# cfg = Configuration( hyper_params={ "ku_num": 835, "hidden_num": 835, }, ctx="cuda:0,1,2", ) net = get_net(**cfg.hyper_params) pseudo_numerical_check(net, cfg) elif stage == 1: # ################################# Simple Train ############################### train( "$data_dir/train", "$data_dir/test", dataset="junyi", ctx="cuda:0", optimizer_params={"lr": 0.001}, hyper_params={ "ku_num": 835, "hidden_num": 835 }, batch_size=16, ) elif stage == 2: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def train(train_fn, test_fn, reporthook=None, final_reporthook=None, primary_key="macro_auc", params_save=False, **cfg_kwargs): # pragma: no cover from longling.ML.toolkit.hyper_search import prepare_hyper_search cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search( cfg_kwargs, Configuration, reporthook, final_reporthook, primary_key=primary_key) _cfg = Configuration(**cfg_kwargs) print(_cfg) train_data, train_df = etl(_cfg.var2val(train_fn), params=_cfg) test_data, _ = etl(_cfg.var2val(test_fn), params=_cfg) _net = get_net(**_cfg.hyper_params) net_init(_net, cfg=_cfg, **_cfg.init_params, int_df=train_df, user_num=_cfg.hyper_params["user_num"], item_num=_cfg.hyper_params["item_num"], logger=_cfg.logger) numerical_check(_net, _cfg, train_data, test_data, dump_result=not tag, reporthook=reporthook, final_reporthook=final_reporthook, params_save=params_save)
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration( hyper_params=dict( ku_num=835, key_embedding_dim=50, value_embedding_dim=200, hidden_num=900, key_memory_size=20, value_memory_size=20, key_memory_state_dim=50, value_memory_state_dim=200, ) ) net = get_net(**cfg.hyper_params) if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/train.json", "$data_dir/test.json", dataset="junyi", ctx=mx.gpu(), optimizer_params={ "learning_rate": 0.001 }, hyper_params=dict( ku_num=835, key_embedding_dim=200, value_embedding_dim=200, hidden_num=835, key_memory_size=40, dropout=0.5, ), batch_size=16, root="../../../", data_dir="$root_data_dir", end_epoch=10, ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration( hyper_params={ "model_type": "wclstm", "class_num": 32, "embedding_dim": 256, } ) embedding_size = { "w": 100, "c": 100, } net = get_net(embedding_size=embedding_size, **cfg.hyper_params) if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/train.json", "$data_dir/test.json", "w:$vec_dir/word.vec.dat,c:$vec_dir/char.vec.dat", ctx=mx.gpu(), hyper_params={ "model_type": "wclstm", "class_num": 32, "embedding_dim": 256, }, root="../../../..", dataset="ctc32", data_dir="$root_data_dir", end_epoch=1, ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration( hyper_params=dict( ku_num=835, ) ) net = get_net(**cfg.hyper_params) if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/train.json", "$data_dir/test.json", ctx=mx.gpu(), optimizer_params={ "learning_rate": 0.001 }, dump=True, data_dir="$root_data_dir", # end_epoch=20, # root="../../../", # dataset="assistment_2009_2010", # hyper_params=dict( # ku_num=124, # hidden_num=32, # latent_dim=32, # dropout=0.5, # ), end_epoch=10, hyper_params=dict( ku_num=835, hidden_num=32, latent_dim=32, dropout=0.5, ), dataset="junyi", root="/data/tongshiwei/KT", root_data_dir="$root/$dataset", ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError
def sym_run(stage: (int, str) = "viz"): # pragma: no cover if isinstance(stage, str): stage = { "viz": 0, "pseudo": 1, "real": 2, "cli": 3, }[stage] if stage <= 1: cfg = Configuration(hyper_params={ "user_num": 1000, "item_num": 100, "vec_dim": 100, "op": "mlp" }, eval_params={ "unlabeled_value": 0, "k": [1, 3], "pointwise": True, }) net = get_net(**cfg.hyper_params) if stage == 0: # ############################## Net Visualization ########################### net_viz(net, cfg, False) else: # ############################## Pseudo Test ################################# pseudo_numerical_check(net, cfg) elif stage == 2: # ################################# Simple Train ############################### import mxnet as mx train( "$data_dir/ml-1m/train.jsonl", "$data_dir/ml-1m/test.jsonl", hyper_params={ "num_a": 6040, "num_b": 3900, "vec_dim": 128, "op": "linear" }, root_data_dir="../../../../", optimizer_params={"learning_rate": 0.001}, # ctx=[mx.gpu(3)], ctx=[mx.gpu(5)], batch_size=16, # optimizer_params={ # "learning_rate": 0.01 # }, # ctx=[mx.gpu(5), mx.gpu(6), mx.gpu(7), mx.gpu(8)], # batch_size=256, ) elif stage == 3: # ################################# CLI ########################### cfg_parser = ConfigurationParser(Configuration, commands=[train]) cfg_kwargs = cfg_parser() assert "subcommand" in cfg_kwargs subcommand = cfg_kwargs["subcommand"] del cfg_kwargs["subcommand"] print(cfg_kwargs) eval("%s" % subcommand)(**cfg_kwargs) else: raise TypeError