コード例 #1
0
ファイル: structbilty.py プロジェクト: SigridK/bilstm-aux
def main():
    parser = argparse.ArgumentParser(
        description="""Run the bi-LSTM tagger""",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    group_main = parser.add_argument_group('Main', 'main arguments')
    group_main.add_argument("--model",
                            help="path to store/load model [required]",
                            required=True)
    group_main.add_argument(
        "--train",
        nargs='*',
        help="path to train file [if multiple files are given actives MTL]"
    )  # allow multiple train files, each asociated with a task = position in the list
    group_main.add_argument("--dev",
                            nargs='*',
                            help="dev file(s)",
                            required=False)
    group_main.add_argument("--test",
                            nargs='*',
                            help="test file(s) [same order as --train]",
                            required=False)

    group_model = parser.add_argument_group('Model',
                                            'specify model parameters')
    group_model.add_argument("--in_dim",
                             help="input dimension",
                             type=int,
                             default=64)  # default Polyglot size
    group_model.add_argument("--h_dim",
                             help="hidden dimension [default: 100]",
                             type=int,
                             default=100)
    group_model.add_argument("--c_in_dim",
                             help="input dimension for character embeddings",
                             type=int,
                             default=100)
    group_model.add_argument("--c_h_dim",
                             help="hidden dimension for character embeddings",
                             type=int,
                             default=100)
    group_model.add_argument(
        "--h_layers",
        help="number of stacked LSTMs [default: 1 = no stacking]",
        required=False,
        type=int,
        default=1)
    group_model.add_argument(
        "--pred_layer",
        nargs='*',
        help="predict task at this layer [default: last layer]",
        required=False
    )  # for each task the layer on which it is predicted (default 1)
    group_model.add_argument("--embeds",
                             help="word embeddings file",
                             required=False,
                             default=None)
    group_model.add_argument("--crf",
                             help="use CRF instead of local decoding",
                             default=False,
                             action="store_true")
    group_model.add_argument(
        "--viterbi-loss",
        help="Use viterbi loss training (only active if --crf is on)",
        action="store_true",
        default=False)
    group_model.add_argument("--transition-matrix",
                             help="store transition matrix from CRF")

    group_model.add_argument("--builder",
                             help="RNN builder (default: lstmc)",
                             choices=BUILDERS.keys(),
                             default="lstmc")

    group_model.add_argument(
        "--mlp",
        help="add additional MLP layer of this dimension [default 0=disabled]",
        default=0,
        type=int)
    group_model.add_argument(
        "--ac-mlp",
        help=
        "activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)",
        default="tanh",
        choices=ACTIVATION_MAP.keys())
    group_model.add_argument(
        "--ac",
        help="activation function between hidden layers [rectify, tanh, ...]",
        default="tanh",
        choices=ACTIVATION_MAP.keys())

    group_input = parser.add_argument_group('Input', 'specific input options')
    group_input.add_argument(
        "--raw",
        help="expects raw text input (one sentence per line)",
        required=False,
        action="store_true",
        default=False)

    group_output = parser.add_argument_group('Output',
                                             'specific output options')
    group_output.add_argument(
        "--dictionary",
        help=
        "use dictionary as additional features or type constraints (with --type-constraints)",
        default=None)
    group_output.add_argument("--type-constraint",
                              help="use dictionary as type constraints",
                              default=False,
                              action="store_true")
    group_output.add_argument("--embed-lex",
                              help="use dictionary as type constraints",
                              default=False,
                              action="store_true")
    group_output.add_argument("--lex-dim",
                              help="input dimension for lexical features",
                              default=0,
                              type=int)
    group_output.add_argument(
        "--output",
        help="output predictions to file [word|gold|pred]",
        default=None)
    group_output.add_argument("--output-confidences",
                              help="output tag confidences",
                              action="store_true",
                              default=False)
    group_output.add_argument("--save-embeds",
                              help="save word embeddings to file",
                              required=False,
                              default=None)
    group_output.add_argument("--save-lexembeds",
                              help="save lexicon embeddings to file",
                              required=False,
                              default=None)
    group_output.add_argument(
        "--save-cwembeds",
        help="save character-based word-embeddings to file",
        required=False,
        default=None)
    group_output.add_argument(
        "--save-lwembeds",
        help="save lexicon-based word-embeddings to file",
        required=False,
        default=None)
    group_output.add_argument("--mimickx-model",
                              help="use mimickx model for OOVs",
                              required=False,
                              default=None,
                              type=str)

    group_opt = parser.add_argument_group('Optimizer',
                                          'specify training parameters')
    group_opt.add_argument("--iters",
                           help="training iterations",
                           type=int,
                           default=20)
    group_opt.add_argument("--sigma",
                           help="sigma of Gaussian noise",
                           default=0.2,
                           type=float)
    group_opt.add_argument("--trainer",
                           help="trainer [default: sgd]",
                           choices=TRAINER_MAP.keys(),
                           default="sgd")
    group_opt.add_argument(
        "--learning-rate",
        help="learning rate [0: use default]",
        default=0,
        type=float
    )  # see: http://dynet.readthedocs.io/en/latest/optimizers.html
    group_opt.add_argument(
        "--patience",
        help=
        "patience [default: 0=not used], requires specification of --dev and model path --save",
        required=False,
        default=0,
        type=int)
    group_opt.add_argument("--log-losses",
                           help="log loss (for each task if multiple active)",
                           required=False,
                           action="store_true",
                           default=False)
    group_opt.add_argument(
        "--word-dropout-rate",
        help=
        "word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)",
        required=False,
        default=0.25,
        type=float)
    group_opt.add_argument("--char-dropout-rate",
                           help="char dropout rate [default: 0=disabled]",
                           required=False,
                           default=0.0,
                           type=float)
    group_opt.add_argument(
        "--disable-backprob-embeds",
        help="disable backprob into embeddings (default is to update)",
        required=False,
        action="store_false",
        default=True)
    group_opt.add_argument(
        "--initializer",
        help="initializer for embeddings (default: constant)",
        choices=INITIALIZER_MAP.keys(),
        default="constant")

    group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters')
    group_dynet.add_argument("--seed",
                             help="random seed (also for DyNet)",
                             required=False,
                             type=int)
    group_dynet.add_argument("--dynet-mem",
                             help="memory for DyNet",
                             required=False,
                             type=int)
    group_dynet.add_argument(
        "--dynet-gpus", help="1 for GPU usage", default=0, type=int
    )  # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399
    group_dynet.add_argument("--dynet-autobatch",
                             help="if 1 enable autobatching",
                             default=0,
                             type=int)
    group_dynet.add_argument(
        "--minibatch-size",
        help="size of minibatch for autobatching (1=disabled)",
        default=1,
        type=int)

    try:
        args = parser.parse_args()
    except:
        parser.print_help()
        exit()

    if args.train:
        if len(args.train) > 1:
            if not args.pred_layer:
                print("--pred_layer required!")
                exit()
        elif len(args.train) == 1 and not args.pred_layer:
            args.pred_layer = [args.h_layers]  # assumes h_layers is 1

    if args.c_in_dim == 0:
        print(">>> disable character embeddings <<<")

    if args.minibatch_size > 1:
        print(">>> using minibatch_size {} <<<".format(args.minibatch_size))

    if args.viterbi_loss:
        if not args.crf:
            print(
                "--crf (global decoding) needs to be active when --viterbi is used"
            )
            exit()
    if args.crf:
        if args.viterbi_loss:
            print(">>> using global decoding (Viterbi loss) <<<")
        else:
            print(">>> using global decoding (CRF, neg-log loss) <<<")

    if args.patience:
        if not args.dev or not args.model:
            print(
                "patience requires a dev set and model path (--dev and --model)"
            )
            exit()

    # check if --save folder exists
    if args.model:
        if os.path.isdir(args.model):
            if not os.path.exists(args.model):
                print("Creating {}..".format(args.model))
                os.makedirs(args.model)
        elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists(
                os.path.dirname(args.model)):
            print("Creating {}..".format(os.path.dirname(args.model)))
            os.makedirs(os.path.dirname(args.model))

    if args.output:
        if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists(
                os.path.dirname(args.output)):
            os.makedirs(os.path.dirname(args.output))

    if not args.seed:
        ## set seed
        seed = random.randint(1, MAX_SEED)
    else:
        seed = args.seed

    print(">>> using seed: {} <<< ".format(seed))
    np.random.seed(seed)
    random.seed(seed)

    init_dynet(seed)

    if args.mimickx_model:
        from mimickx import Mimickx, load_model  # make sure PYTHONPATH is set
        print(">>> Loading mimickx model {} <<<".format(args.mimickx_model))

    model_path = args.model

    start = time.time()

    if args.train and len(args.train) != 0:

        tagger = NNTagger(args.in_dim,
                          args.h_dim,
                          args.c_in_dim,
                          args.c_h_dim,
                          args.h_layers,
                          args.pred_layer,
                          embeds_file=args.embeds,
                          w_dropout_rate=args.word_dropout_rate,
                          c_dropout_rate=args.char_dropout_rate,
                          activation=ACTIVATION_MAP[args.ac],
                          mlp=args.mlp,
                          activation_mlp=ACTIVATION_MAP[args.ac_mlp],
                          noise_sigma=args.sigma,
                          learning_algo=args.trainer,
                          learning_rate=args.learning_rate,
                          backprob_embeds=args.disable_backprob_embeds,
                          initializer=INITIALIZER_MAP[args.initializer],
                          builder=BUILDERS[args.builder],
                          crf=args.crf,
                          mimickx_model_path=args.mimickx_model,
                          dictionary=args.dictionary,
                          type_constraint=args.type_constraint,
                          lex_dim=args.lex_dim,
                          embed_lex=args.embed_lex)

        dev = None
        train = SeqData(args.train)
        if args.dev:
            dev = SeqData(args.dev)

        tagger.fit(train,
                   args.iters,
                   dev=dev,
                   model_path=model_path,
                   patience=args.patience,
                   minibatch_size=args.minibatch_size,
                   log_losses=args.log_losses)

        if not args.dev and not args.patience:  # in case patience is active it gets saved in the fit function
            save(tagger, model_path)

    if args.test and len(args.test) != 0:

        tagger = load(args.model, args.dictionary)

        # check if mimickx provided after training
        if args.mimickx_model:
            tagger.mimickx_model_path = args.mimickx_model
            tagger.mimickx_model = load_model(args.mimickx_model)

        stdout = sys.stdout
        # One file per test ...
        if args.test:
            test = SeqData(args.test)  # read in all test data

            for i, test_file in enumerate(
                    args.test):  # expect them in same order
                if args.output is not None:
                    sys.stdout = codecs.open(args.output + ".task{}".format(i),
                                             'w',
                                             encoding='utf-8')

                start_testing = time.time()

                print('\nTesting task{}'.format(i), file=sys.stderr)
                print('*******\n', file=sys.stderr)
                correct, total = tagger.evaluate(
                    test,
                    "task{}".format(i),
                    output_predictions=args.output,
                    output_confidences=args.output_confidences,
                    raw=args.raw,
                    unk_tag=None)
                if not args.raw:
                    print("\nTask{} test accuracy on {} items: {:.4f}".format(
                        i, i + 1, correct / total),
                          file=sys.stderr)
                print((
                    "Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds)."
                    .format(time.time() - start,
                            time.time() - start_testing)),
                      file=sys.stderr)
                sys.stdout = stdout
    if args.train:
        print("Info: biLSTM\n\t" + "\n\t".join([
            "{}: {}".format(a, v) for a, v in vars(args).items()
            if a not in ["train", "test", "dev", "pred_layer"]
        ]))
    else:
        # print less when only testing, as not all train params are stored explicitly
        print("Info: biLSTM\n\t" + "\n\t".join([
            "{}: {}".format(a, v) for a, v in vars(args).items() if a not in [
                "train", "test", "dev", "pred_layer", "initializer", "ac",
                "word_dropout_rate", "patience", "sigma",
                "disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem",
                "iters"
            ]
        ]))

    tagger = load(args.model, args.dictionary)

    if args.save_embeds:
        tagger.save_embeds(args.save_embeds)

    if args.save_lexembeds:
        tagger.save_lex_embeds(args.save_lexembeds)

    if args.save_cwembeds:
        tagger.save_cw_embeds(args.save_cwembeds)

    if args.save_lwembeds:
        tagger.save_lw_embeds(args.save_lwembeds)

    if args.transition_matrix:
        tagger.save_transition_matrix(args.transition_matrix)
コード例 #2
0
ファイル: structbilty.py プロジェクト: bplank/bilstm-aux
def main():
    parser = argparse.ArgumentParser(description="""Run the bi-LSTM tagger""", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    group_main = parser.add_argument_group('Main', 'main arguments')
    group_main.add_argument("--model", help="path to store/load model [required]", required=True)
    group_main.add_argument("--train", nargs='*', help="path to train file [if multiple files are given actives MTL]") # allow multiple train files, each asociated with a task = position in the list
    group_main.add_argument("--dev", nargs='*', help="dev file(s)", required=False)
    group_main.add_argument("--test", nargs='*', help="test file(s) [same order as --train]", required=False)

    group_model = parser.add_argument_group('Model', 'specify model parameters')
    group_model.add_argument("--in_dim", help="input dimension", type=int, default=64) # default Polyglot size
    group_model.add_argument("--h_dim", help="hidden dimension [default: 100]", type=int, default=100)
    group_model.add_argument("--c_in_dim", help="input dimension for character embeddings", type=int, default=100)
    group_model.add_argument("--c_h_dim", help="hidden dimension for character embeddings", type=int, default=100)
    group_model.add_argument("--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False, type=int, default=1)
    group_model.add_argument("--pred_layer", nargs='*', help="predict task at this layer [default: last layer]", required=False) # for each task the layer on which it is predicted (default 1)
    group_model.add_argument("--embeds", help="word embeddings file", required=False, default=None)
    group_model.add_argument("--crf", help="use CRF instead of local decoding", default=False, action="store_true")
    group_model.add_argument("--viterbi-loss", help="Use viterbi loss training (only active if --crf is on)", action="store_true", default=False)
    group_model.add_argument("--transition-matrix", help="store transition matrix from CRF")

    group_model.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc")

    group_model.add_argument("--mlp", help="add additional MLP layer of this dimension [default 0=disabled]", default=0, type=int)
    group_model.add_argument("--ac-mlp", help="activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)",
                        default="tanh", choices=ACTIVATION_MAP.keys())
    group_model.add_argument("--ac", help="activation function between hidden layers [rectify, tanh, ...]", default="tanh",
                             choices=ACTIVATION_MAP.keys())

    group_input = parser.add_argument_group('Input', 'specific input options')
    group_input.add_argument("--raw", help="expects raw text input (one sentence per line)", required=False, action="store_true", default=False)

    group_output = parser.add_argument_group('Output', 'specific output options')
    group_output.add_argument("--dictionary", help="use dictionary as additional features or type constraints (with --type-constraints)", default=None)
    group_output.add_argument("--type-constraint", help="use dictionary as type constraints", default=False, action="store_true")
    group_output.add_argument("--embed-lex", help="use dictionary as type constraints", default=False, action="store_true")
    group_output.add_argument("--lex-dim", help="input dimension for lexical features", default=0, type=int)
    group_output.add_argument("--output", help="output predictions to file [word|gold|pred]", default=None)
    group_output.add_argument("--output-confidences", help="output tag confidences", action="store_true", default=False)
    group_output.add_argument("--save-embeds", help="save word embeddings to file", required=False, default=None)
    group_output.add_argument("--save-lexembeds", help="save lexicon embeddings to file", required=False, default=None)
    group_output.add_argument("--save-cwembeds", help="save character-based word-embeddings to file", required=False, default=None)
    group_output.add_argument("--save-lwembeds", help="save lexicon-based word-embeddings to file", required=False, default=None)
    group_output.add_argument("--mimickx-model", help="use mimickx model for OOVs", required=False, default=None, type=str)


    group_opt = parser.add_argument_group('Optimizer', 'specify training parameters')
    group_opt.add_argument("--iters", help="training iterations", type=int,default=20)
    group_opt.add_argument("--sigma", help="sigma of Gaussian noise",default=0.2, type=float)
    group_opt.add_argument("--trainer", help="trainer [default: sgd]", choices=TRAINER_MAP.keys(), default="sgd")
    group_opt.add_argument("--learning-rate", help="learning rate [0: use default]", default=0, type=float) # see: http://dynet.readthedocs.io/en/latest/optimizers.html
    group_opt.add_argument("--patience", help="patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=0, type=int)
    group_opt.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False)
    group_opt.add_argument("--word-dropout-rate", help="word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)", required=False, default=0.25, type=float)
    group_opt.add_argument("--char-dropout-rate", help="char dropout rate [default: 0=disabled]", required=False, default=0.0, type=float)
    group_opt.add_argument("--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)",
                        required=False, action="store_false", default=True)
    group_opt.add_argument("--initializer", help="initializer for embeddings (default: constant)",
                        choices=INITIALIZER_MAP.keys(), default="constant")


    group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters')
    group_dynet.add_argument("--seed", help="random seed (also for DyNet)", required=False, type=int)
    group_dynet.add_argument("--dynet-mem", help="memory for DyNet", required=False, type=int)
    group_dynet.add_argument("--dynet-gpus", help="1 for GPU usage", default=0, type=int) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399
    group_dynet.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int)
    group_dynet.add_argument("--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int)

    try:
        args = parser.parse_args()
    except:
        parser.print_help()
        exit()

    if args.train:
        if len(args.train) > 1:
            if not args.pred_layer:
                print("--pred_layer required!")
                exit()
        elif len(args.train) == 1 and not args.pred_layer:
            args.pred_layer = [args.h_layers] # assumes h_layers is 1

    if args.c_in_dim == 0:
        print(">>> disable character embeddings <<<")

    if args.minibatch_size > 1:
        print(">>> using minibatch_size {} <<<".format(args.minibatch_size))

    if args.viterbi_loss:
        if not args.crf:
            print("--crf (global decoding) needs to be active when --viterbi is used")
            exit()
    if args.crf:
        if args.viterbi_loss:
            print(">>> using global decoding (Viterbi loss) <<<")
        else:
            print(">>> using global decoding (CRF, neg-log loss) <<<")

    if args.patience:
        if not args.dev or not args.model:
            print("patience requires a dev set and model path (--dev and --model)")
            exit()

    # check if --save folder exists
    if args.model:
        if os.path.isdir(args.model):
            if not os.path.exists(args.model):
                print("Creating {}..".format(args.model))
                os.makedirs(args.model)
        elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists(os.path.dirname(args.model)):
            print("Creating {}..".format(os.path.dirname(args.model)))
            os.makedirs(os.path.dirname(args.model))

    if args.output:
        if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists(os.path.dirname(args.output)):
            os.makedirs(os.path.dirname(args.output))

    if not args.seed:
        ## set seed
        seed = random.randint(1, MAX_SEED)
    else:
        seed = args.seed

    print(">>> using seed: {} <<< ".format(seed))
    np.random.seed(seed)
    random.seed(seed)

    init_dynet(seed)

    if args.mimickx_model:
        from mimickx import Mimickx, load_model  # make sure PYTHONPATH is set
        print(">>> Loading mimickx model {} <<<".format(args.mimickx_model))

    model_path = args.model

    start = time.time()

    if args.train and len( args.train ) != 0:

        tagger = NNTagger(args.in_dim,
                          args.h_dim,
                          args.c_in_dim,
                          args.c_h_dim,
                          args.h_layers,
                          args.pred_layer,
                          embeds_file=args.embeds,
                          w_dropout_rate=args.word_dropout_rate,
                          c_dropout_rate=args.char_dropout_rate,
                          activation=ACTIVATION_MAP[args.ac],
                          mlp=args.mlp,
                          activation_mlp=ACTIVATION_MAP[args.ac_mlp],
                          noise_sigma=args.sigma,
                          learning_algo=args.trainer,
                          learning_rate=args.learning_rate,
                          backprob_embeds=args.disable_backprob_embeds,
                          initializer=INITIALIZER_MAP[args.initializer],
                          builder=BUILDERS[args.builder],
                          crf=args.crf,
                          mimickx_model_path=args.mimickx_model,
                          dictionary=args.dictionary, type_constraint=args.type_constraint,
                          lex_dim=args.lex_dim, embed_lex=args.embed_lex)

        dev = None
        train = SeqData(args.train)
        if args.dev:
            dev = SeqData(args.dev)

        tagger.fit(train, args.iters,
                   dev=dev,
                   model_path=model_path, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses)

        if not args.dev and not args.patience:  # in case patience is active it gets saved in the fit function
            save(tagger, model_path)

    if args.test and len( args.test ) != 0:

        tagger = load(args.model, args.dictionary)

        # check if mimickx provided after training
        if args.mimickx_model:
            tagger.mimickx_model_path = args.mimickx_model
            tagger.mimickx_model = load_model(args.mimickx_model)

        stdout = sys.stdout
        # One file per test ...
        if args.test:
            test = SeqData(args.test, raw=args.raw) # read in all test data

            for i, test_file in enumerate(args.test): # expect them in same order
                if args.output is not None:
                    sys.stdout = codecs.open(args.output + ".task{}".format(i), 'w', encoding='utf-8')

                start_testing = time.time()

                print('\nTesting task{}'.format(i),file=sys.stderr)
                print('*******\n',file=sys.stderr)
                correct, total = tagger.evaluate(test, "task{}".format(i),
                                                 output_predictions=args.output,
                                                 output_confidences=args.output_confidences, raw=args.raw,
                                                 unk_tag=None)
                if not args.raw:
                    print("\nTask{} test accuracy on {} items: {:.4f}".format(i, i+1, correct/total),file=sys.stderr)
                print(("Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds).".format(time.time()-start,
                                                                                                    time.time()-start_testing)),file=sys.stderr)
                sys.stdout = stdout
    if args.train:
        print("Info: biLSTM\n\t"+"\n\t".join(["{}: {}".format(a,v) for a, v in vars(args).items()
                                          if a not in ["train","test","dev","pred_layer"]]))
    else:
        # print less when only testing, as not all train params are stored explicitly
        print("Info: biLSTM\n\t" + "\n\t".join(["{}: {}".format(a, v) for a, v in vars(args).items()
                                                if a not in ["train", "test", "dev", "pred_layer",
                                                             "initializer","ac","word_dropout_rate",
                                                             "patience","sigma","disable_backprob_embed",
                                                             "trainer", "dynet_seed", "dynet_mem","iters"]]))

    tagger = load(args.model, args.dictionary)

    if args.save_embeds:
        tagger.save_embeds(args.save_embeds)

    if args.save_lexembeds:
        tagger.save_lex_embeds(args.save_lexembeds)

    if args.save_cwembeds:
        tagger.save_cw_embeds(args.save_cwembeds)

    if args.save_lwembeds:
        tagger.save_lw_embeds(args.save_lwembeds)
    
    if args.transition_matrix:
        tagger.save_transition_matrix(args.transition_matrix)
コード例 #3
0
ファイル: bilty.py プロジェクト: sankexin/bilstm-aux
def main():
    parser = argparse.ArgumentParser(description="""Run the NN tagger""")
    parser.add_argument("--train", nargs='*', help="train folder for each task") # allow multiple train files, each asociated with a task = position in the list
    parser.add_argument("--pred_layer", nargs='*', help="layer of predictons for each task", required=True) # for each task the layer on which it is predicted (default 1)
    parser.add_argument("--model", help="load model from file", required=False)
    parser.add_argument("--iters", help="training iterations [default: 30]", required=False,type=int,default=30)
    parser.add_argument("--in_dim", help="input dimension [default: 64] (like Polyglot embeds)", required=False,type=int,default=64)
    parser.add_argument("--c_in_dim", help="input dimension for character embeddings [default: 100]", required=False,type=int,default=100)
    parser.add_argument("--h_dim", help="hidden dimension [default: 100]", required=False,type=int,default=100)
    parser.add_argument("--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False,type=int,default=1)
    parser.add_argument("--test", nargs='*', help="test file(s)", required=False) # should be in the same order/task as train
    parser.add_argument("--raw", help="if test file is in raw format (one sentence per line)", required=False, action="store_true", default=False)
    parser.add_argument("--dev", help="dev file(s)", required=False) 
    parser.add_argument("--output", help="output predictions to file", required=False,default=None)
    parser.add_argument("--save", help="save model to file (appends .model as well as .pickle)", required=True,default=None)
    parser.add_argument("--embeds", help="word embeddings file", required=False, default=None)
    parser.add_argument("--sigma", help="noise sigma", required=False, default=0.2, type=float)
    parser.add_argument("--ac", help="activation function [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys())
    parser.add_argument("--mlp", help="use MLP layer of this dimension [default 0=disabled]", required=False, default=0, type=int)
    parser.add_argument("--ac-mlp", help="activation function for MLP (if used) [rectify, tanh, ...]", default="rectify", choices=ACTIVATION_MAP.keys())
    parser.add_argument("--trainer", help="trainer [default: sgd]", required=False, choices=TRAINER_MAP.keys(), default="sgd")
    parser.add_argument("--learning-rate", help="learning rate [0: use default]", default=0, type=float) # see: http://dynet.readthedocs.io/en/latest/optimizers.html
    parser.add_argument("--patience", help="patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=-1, type=int)
    parser.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False)
    parser.add_argument("--word-dropout-rate", help="word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kipperwasser & Goldberg, 2016)", required=False, default=0.25, type=float)

    parser.add_argument("--dynet-seed", help="random seed for dynet (needs to be first argument!)", required=False, type=int)
    parser.add_argument("--dynet-mem", help="memory for dynet (needs to be first argument!)", required=False, type=int)
    parser.add_argument("--dynet-gpus", help="1 for GPU usage", default=0, type=int) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399
    parser.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int)
    parser.add_argument("--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int)

    parser.add_argument("--save-embeds", help="save word embeddings file", required=False, default=None)
    parser.add_argument("--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True)
    parser.add_argument("--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant")
    parser.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc")

    # new parameters
    parser.add_argument('--max-vocab-size', type=int, help='the maximum size '
                                                           'of the vocabulary')

    args = parser.parse_args()

    if args.output is not None:
        assert os.path.exists(os.path.dirname(args.output))

    if args.train:
        if not args.pred_layer:
            print("--pred_layer required!")
            exit()
    
    if args.dynet_seed:
        print(">>> using seed: {} <<< ".format(args.dynet_seed), file=sys.stderr)
        np.random.seed(args.dynet_seed)
        random.seed(args.dynet_seed)

    if args.c_in_dim == 0:
        print(">>> disable character embeddings <<<", file=sys.stderr)

    if args.minibatch_size > 1:
        print(">>> using minibatch_size {} <<<".format(args.minibatch_size))

    if args.patience:
        if not args.dev or not args.save:
            print("patience requires a dev set and model path (--dev and --save)")
            exit()

    if args.save:
        # check if folder exists
        if os.path.isdir(args.save):
            if not os.path.exists(args.save):
                print("Creating {}..".format(args.save))
                os.makedirs(args.save)

    if args.output:
        if os.path.isdir(args.output):
            outdir = os.path.dirname(args.output)
            if not os.path.exists(outdir):
                os.makedirs(outdir)

    start = time.time()

    if args.model:
        print("loading model from file {}".format(args.model), file=sys.stderr)
        tagger = load(args)
    else:
        tagger = NNTagger(args.in_dim,
                          args.h_dim,
                          args.c_in_dim,
                          args.h_layers,
                          args.pred_layer,
                          embeds_file=args.embeds,
                          activation=ACTIVATION_MAP[args.ac],
                          mlp=args.mlp,
                          activation_mlp=ACTIVATION_MAP[args.ac_mlp],
                          noise_sigma=args.sigma,
                          learning_algo=args.trainer,
                          learning_rate=args.learning_rate,
                          backprob_embeds=args.disable_backprob_embeds,
                          initializer=INITIALIZER_MAP[args.initializer],
                          builder=BUILDERS[args.builder],
                          max_vocab_size=args.max_vocab_size
                          )

    if args.train and len( args.train ) != 0:
        tagger.fit(args.train, args.iters,
                   dev=args.dev, word_dropout_rate=args.word_dropout_rate,
                   model_path=args.save, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses)

        if args.save and not args.patience:  # in case patience is active it gets saved in the fit function
            save(tagger, args.save)

        if args.patience:
            # reload patience 2 model
            tagger = load(args.save)

    if args.test and len( args.test ) != 0:
        if not args.model:
            if not args.train:
                print("specify a model!")
                sys.exit()

        stdout = sys.stdout
        # One file per test ... 
        for i, test in enumerate(args.test):

            if args.output is not None:
                file_pred = args.output+".task"+str(i)
                sys.stdout = codecs.open(file_pred, 'w', encoding='utf-8')

            sys.stderr.write('\nTesting Task'+str(i)+'\n')
            sys.stderr.write('*******\n')
            test_X, test_Y, org_X, org_Y, task_labels = tagger.get_data_as_indices(test, "task"+str(i), raw=args.raw)
            correct, total = tagger.evaluate(test_X, test_Y, org_X, org_Y, task_labels,
                                             output_predictions=args.output, raw=args.raw)

            if not args.raw:
                print("\nTask%s test accuracy on %s items: %.4f" % (i, i+1, correct/total), file=sys.stderr)
            print(("Done. Took {0:.2f} seconds.".format(time.time()-start)),file=sys.stderr)
            sys.stdout = stdout
    if args.train:
        print("Info: biLSTM\n\t"+"\n\t".join(["{}: {}".format(a,v) for a, v in vars(args).items()
                                          if a not in ["train","test","dev","pred_layer"]]))
    else:
        # print less when only testing, as not all train params are stored explicitly
        print("Info: biLSTM\n\t" + "\n\t".join(["{}: {}".format(a, v) for a, v in vars(args).items()
                                                if a not in ["train", "test", "dev", "pred_layer",
                                                             "initializer","ac","word_dropout_rate",
                                                             "patience","sigma","disable_backprob_embed",
                                                             "trainer", "dynet_seed", "dynet_mem","iters"]]))

    if args.save_embeds:
        tagger.save_embeds(args.save_embeds)