Ejemplo n.º 1
0
Archivo: run.py Proyecto: tswsxk/XKT
def train(train_fn,
          test_fn,
          reporthook=None,
          final_reporthook=None,
          **cfg_kwargs):  # pragma: no cover
    from longling.ML.toolkit.hyper_search import prepare_hyper_search

    cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search(
        cfg_kwargs,
        Configuration,
        reporthook,
        final_reporthook,
        primary_key="macro_avg:f1")

    _cfg = Configuration(**cfg_kwargs)
    print(_cfg)
    _net = get_net(**_cfg.hyper_params)

    train_data = etl(_cfg.var2val(train_fn), params=_cfg)
    test_data = etl(_cfg.var2val(test_fn), params=_cfg)

    numerical_check(_net,
                    _cfg,
                    train_data,
                    test_data,
                    dump_result=not tag,
                    reporthook=reporthook,
                    final_reporthook=final_reporthook)
Ejemplo n.º 2
0
def train(train_fn, test_fn, vec_files, reporthook=None, final_reporthook=None, **cfg_kwargs):  # pragma: no cover
    from longling import print_time
    from longling.ML.toolkit.hyper_search import prepare_hyper_search

    cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search(
        cfg_kwargs, Configuration, reporthook, final_reporthook, final_key="prf:avg:f1"
    )

    _cfg = Configuration(**cfg_kwargs)
    _cfg.logger.info(_cfg)
    vec_files = parse_vec_files(vec_files)

    with print_time(tips='loading embedding', logger=_cfg.logger):
        embeddings = load_embedding({k: _cfg.var2val(v) for k, v in vec_files.items()}, _cfg.logger)

    train_data = etl(_cfg.var2val(train_fn), embeddings, params=_cfg)
    test_data = etl(_cfg.var2val(test_fn), embeddings, params=_cfg)

    embedding_size = get_embedding_size(embeddings)

    _net = get_net(embedding_size=embedding_size, **_cfg.hyper_params)
    _net.initialize(ctx=_cfg.ctx)
    _net.embedding.set_weight(get_embedding_array(embeddings))

    numerical_check(_net, _cfg, train_data, test_data, dump_result=not tag, reporthook=reporthook,
                    final_reporthook=final_reporthook)
Ejemplo n.º 3
0
Archivo: run.py Proyecto: TicOnaLT/XKT
def train(train_fn, test_fn, **cfg_kwargs):
    _cfg = Configuration(**cfg_kwargs)
    _net = get_net(**_cfg.hyper_params)

    train_data = etl(_cfg.var2val(train_fn), params=_cfg)
    test_data = etl(_cfg.var2val(test_fn), params=_cfg)

    numerical_check(_net, _cfg, train_data, test_data, dump_result=True)
Ejemplo n.º 4
0
Archivo: run.py Proyecto: tswsxk/XKT
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(
            hyper_params={
                "ku_num": 835,
                "latent_dim": 600,
                "hidden_num": 900,
            },
            nettype="EmbedDKT",
        )

        net = get_net(**cfg.hyper_params)

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/train.json",
            "$data_dir/test.json",
            dataset="assistment_2009_2010",
            ctx=mx.gpu(2),
            optimizer_params={"learning_rate": 0.001},
            hyper_params={
                "ku_num": 146,
                "hidden_num": 200,
                "dropout": 0.5
            },
            batch_size=16,
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 5
0
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(hyper_params={})
        net = get_net(**cfg.hyper_params)
        net.initialize()

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/train.csv",
            "$data_dir/test.csv",
            ctx=mx.cpu(),
            optimizer_params={
                "learning_rate": 0.01,
                "clip_gradient": 1,
            },
            dataset="a0910",
            # init_params_update={"initial_user_item": False},
            # workspace="random",
            workspace="init",
            # params_save=True
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 6
0
def sym_run(stage: (int, str) = "pesudo"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "pseudo": 0,
            "real": 1,
            "cli": 2,
        }[stage]

    if stage == 0:
        # ############################## Pesudo Test #################################
        cfg = Configuration(
            hyper_params={
                "ku_num": 835,
                "hidden_num": 835,
            },
            ctx="cuda:0,1,2",
        )
        net = get_net(**cfg.hyper_params)
        pseudo_numerical_check(net, cfg)

    elif stage == 1:
        # ################################# Simple Train ###############################
        train(
            "$data_dir/train",
            "$data_dir/test",
            dataset="junyi",
            ctx="cuda:0",
            optimizer_params={"lr": 0.001},
            hyper_params={
                "ku_num": 835,
                "hidden_num": 835
            },
            batch_size=16,
        )

    elif stage == 2:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 7
0
def train(train_fn,
          test_fn,
          reporthook=None,
          final_reporthook=None,
          primary_key="macro_auc",
          params_save=False,
          **cfg_kwargs):  # pragma: no cover
    from longling.ML.toolkit.hyper_search import prepare_hyper_search

    cfg_kwargs, reporthook, final_reporthook, tag = prepare_hyper_search(
        cfg_kwargs,
        Configuration,
        reporthook,
        final_reporthook,
        primary_key=primary_key)

    _cfg = Configuration(**cfg_kwargs)
    print(_cfg)

    train_data, train_df = etl(_cfg.var2val(train_fn), params=_cfg)
    test_data, _ = etl(_cfg.var2val(test_fn), params=_cfg)

    _net = get_net(**_cfg.hyper_params)
    net_init(_net,
             cfg=_cfg,
             **_cfg.init_params,
             int_df=train_df,
             user_num=_cfg.hyper_params["user_num"],
             item_num=_cfg.hyper_params["item_num"],
             logger=_cfg.logger)

    numerical_check(_net,
                    _cfg,
                    train_data,
                    test_data,
                    dump_result=not tag,
                    reporthook=reporthook,
                    final_reporthook=final_reporthook,
                    params_save=params_save)
Ejemplo n.º 8
0
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(
            hyper_params=dict(
                ku_num=835,
                key_embedding_dim=50,
                value_embedding_dim=200,
                hidden_num=900,
                key_memory_size=20,
                value_memory_size=20,
                key_memory_state_dim=50,
                value_memory_state_dim=200,
            )
        )

        net = get_net(**cfg.hyper_params)

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/train.json",
            "$data_dir/test.json",
            dataset="junyi",
            ctx=mx.gpu(),
            optimizer_params={
                "learning_rate": 0.001
            },
            hyper_params=dict(
                ku_num=835,
                key_embedding_dim=200,
                value_embedding_dim=200,
                hidden_num=835,
                key_memory_size=40,
                dropout=0.5,
            ),
            batch_size=16,
            root="../../../",
            data_dir="$root_data_dir",
            end_epoch=10,
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 9
0
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(
            hyper_params={
                "model_type": "wclstm",
                "class_num": 32,
                "embedding_dim": 256,
            }
        )
        embedding_size = {
            "w": 100,
            "c": 100,
        }
        net = get_net(embedding_size=embedding_size, **cfg.hyper_params)

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/train.json",
            "$data_dir/test.json",
            "w:$vec_dir/word.vec.dat,c:$vec_dir/char.vec.dat",
            ctx=mx.gpu(),
            hyper_params={
                "model_type": "wclstm",
                "class_num": 32,
                "embedding_dim": 256,
            },
            root="../../../..",
            dataset="ctc32",
            data_dir="$root_data_dir",
            end_epoch=1,
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 10
0
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(
            hyper_params=dict(
                ku_num=835,
            )
        )

        net = get_net(**cfg.hyper_params)

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/train.json",
            "$data_dir/test.json",
            ctx=mx.gpu(),
            optimizer_params={
                "learning_rate": 0.001
            },
            dump=True,
            data_dir="$root_data_dir",
            # end_epoch=20,
            # root="../../../",
            # dataset="assistment_2009_2010",
            # hyper_params=dict(
            #     ku_num=124,
            #     hidden_num=32,
            #     latent_dim=32,
            #     dropout=0.5,
            # ),
            end_epoch=10,
            hyper_params=dict(
                ku_num=835,
                hidden_num=32,
                latent_dim=32,
                dropout=0.5,
            ),
            dataset="junyi",
            root="/data/tongshiwei/KT",
            root_data_dir="$root/$dataset",
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError
Ejemplo n.º 11
0
Archivo: run.py Proyecto: tswsxk/xrec
def sym_run(stage: (int, str) = "viz"):  # pragma: no cover
    if isinstance(stage, str):
        stage = {
            "viz": 0,
            "pseudo": 1,
            "real": 2,
            "cli": 3,
        }[stage]

    if stage <= 1:
        cfg = Configuration(hyper_params={
            "user_num": 1000,
            "item_num": 100,
            "vec_dim": 100,
            "op": "mlp"
        },
                            eval_params={
                                "unlabeled_value": 0,
                                "k": [1, 3],
                                "pointwise": True,
                            })
        net = get_net(**cfg.hyper_params)

        if stage == 0:
            # ############################## Net Visualization ###########################
            net_viz(net, cfg, False)
        else:
            # ############################## Pseudo Test #################################
            pseudo_numerical_check(net, cfg)

    elif stage == 2:
        # ################################# Simple Train ###############################
        import mxnet as mx
        train(
            "$data_dir/ml-1m/train.jsonl",
            "$data_dir/ml-1m/test.jsonl",
            hyper_params={
                "num_a": 6040,
                "num_b": 3900,
                "vec_dim": 128,
                "op": "linear"
            },
            root_data_dir="../../../../",
            optimizer_params={"learning_rate": 0.001},
            # ctx=[mx.gpu(3)],
            ctx=[mx.gpu(5)],
            batch_size=16,
            # optimizer_params={
            #     "learning_rate": 0.01
            # },
            # ctx=[mx.gpu(5), mx.gpu(6), mx.gpu(7), mx.gpu(8)],
            # batch_size=256,
        )

    elif stage == 3:
        # ################################# CLI ###########################
        cfg_parser = ConfigurationParser(Configuration, commands=[train])
        cfg_kwargs = cfg_parser()
        assert "subcommand" in cfg_kwargs
        subcommand = cfg_kwargs["subcommand"]
        del cfg_kwargs["subcommand"]
        print(cfg_kwargs)
        eval("%s" % subcommand)(**cfg_kwargs)

    else:
        raise TypeError