def main(): parser = argparse.ArgumentParser( description="""Run the bi-LSTM tagger""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) group_main = parser.add_argument_group('Main', 'main arguments') group_main.add_argument("--model", help="path to store/load model [required]", required=True) group_main.add_argument( "--train", nargs='*', help="path to train file [if multiple files are given actives MTL]" ) # allow multiple train files, each asociated with a task = position in the list group_main.add_argument("--dev", nargs='*', help="dev file(s)", required=False) group_main.add_argument("--test", nargs='*', help="test file(s) [same order as --train]", required=False) group_model = parser.add_argument_group('Model', 'specify model parameters') group_model.add_argument("--in_dim", help="input dimension", type=int, default=64) # default Polyglot size group_model.add_argument("--h_dim", help="hidden dimension [default: 100]", type=int, default=100) group_model.add_argument("--c_in_dim", help="input dimension for character embeddings", type=int, default=100) group_model.add_argument("--c_h_dim", help="hidden dimension for character embeddings", type=int, default=100) group_model.add_argument( "--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False, type=int, default=1) group_model.add_argument( "--pred_layer", nargs='*', help="predict task at this layer [default: last layer]", required=False ) # for each task the layer on which it is predicted (default 1) group_model.add_argument("--embeds", help="word embeddings file", required=False, default=None) group_model.add_argument("--crf", help="use CRF instead of local decoding", default=False, action="store_true") group_model.add_argument( "--viterbi-loss", help="Use viterbi loss training (only active if --crf is on)", action="store_true", default=False) group_model.add_argument("--transition-matrix", help="store transition matrix from CRF") group_model.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc") group_model.add_argument( "--mlp", help="add additional MLP layer of this dimension [default 0=disabled]", default=0, type=int) group_model.add_argument( "--ac-mlp", help= "activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)", default="tanh", choices=ACTIVATION_MAP.keys()) group_model.add_argument( "--ac", help="activation function between hidden layers [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys()) group_input = parser.add_argument_group('Input', 'specific input options') group_input.add_argument( "--raw", help="expects raw text input (one sentence per line)", required=False, action="store_true", default=False) group_output = parser.add_argument_group('Output', 'specific output options') group_output.add_argument( "--dictionary", help= "use dictionary as additional features or type constraints (with --type-constraints)", default=None) group_output.add_argument("--type-constraint", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--embed-lex", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--lex-dim", help="input dimension for lexical features", default=0, type=int) group_output.add_argument( "--output", help="output predictions to file [word|gold|pred]", default=None) group_output.add_argument("--output-confidences", help="output tag confidences", action="store_true", default=False) group_output.add_argument("--save-embeds", help="save word embeddings to file", required=False, default=None) group_output.add_argument("--save-lexembeds", help="save lexicon embeddings to file", required=False, default=None) group_output.add_argument( "--save-cwembeds", help="save character-based word-embeddings to file", required=False, default=None) group_output.add_argument( "--save-lwembeds", help="save lexicon-based word-embeddings to file", required=False, default=None) group_output.add_argument("--mimickx-model", help="use mimickx model for OOVs", required=False, default=None, type=str) group_opt = parser.add_argument_group('Optimizer', 'specify training parameters') group_opt.add_argument("--iters", help="training iterations", type=int, default=20) group_opt.add_argument("--sigma", help="sigma of Gaussian noise", default=0.2, type=float) group_opt.add_argument("--trainer", help="trainer [default: sgd]", choices=TRAINER_MAP.keys(), default="sgd") group_opt.add_argument( "--learning-rate", help="learning rate [0: use default]", default=0, type=float ) # see: http://dynet.readthedocs.io/en/latest/optimizers.html group_opt.add_argument( "--patience", help= "patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=0, type=int) group_opt.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False) group_opt.add_argument( "--word-dropout-rate", help= "word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)", required=False, default=0.25, type=float) group_opt.add_argument("--char-dropout-rate", help="char dropout rate [default: 0=disabled]", required=False, default=0.0, type=float) group_opt.add_argument( "--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True) group_opt.add_argument( "--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant") group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters') group_dynet.add_argument("--seed", help="random seed (also for DyNet)", required=False, type=int) group_dynet.add_argument("--dynet-mem", help="memory for DyNet", required=False, type=int) group_dynet.add_argument( "--dynet-gpus", help="1 for GPU usage", default=0, type=int ) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399 group_dynet.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int) group_dynet.add_argument( "--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int) try: args = parser.parse_args() except: parser.print_help() exit() if args.train: if len(args.train) > 1: if not args.pred_layer: print("--pred_layer required!") exit() elif len(args.train) == 1 and not args.pred_layer: args.pred_layer = [args.h_layers] # assumes h_layers is 1 if args.c_in_dim == 0: print(">>> disable character embeddings <<<") if args.minibatch_size > 1: print(">>> using minibatch_size {} <<<".format(args.minibatch_size)) if args.viterbi_loss: if not args.crf: print( "--crf (global decoding) needs to be active when --viterbi is used" ) exit() if args.crf: if args.viterbi_loss: print(">>> using global decoding (Viterbi loss) <<<") else: print(">>> using global decoding (CRF, neg-log loss) <<<") if args.patience: if not args.dev or not args.model: print( "patience requires a dev set and model path (--dev and --model)" ) exit() # check if --save folder exists if args.model: if os.path.isdir(args.model): if not os.path.exists(args.model): print("Creating {}..".format(args.model)) os.makedirs(args.model) elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists( os.path.dirname(args.model)): print("Creating {}..".format(os.path.dirname(args.model))) os.makedirs(os.path.dirname(args.model)) if args.output: if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists( os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) if not args.seed: ## set seed seed = random.randint(1, MAX_SEED) else: seed = args.seed print(">>> using seed: {} <<< ".format(seed)) np.random.seed(seed) random.seed(seed) init_dynet(seed) if args.mimickx_model: from mimickx import Mimickx, load_model # make sure PYTHONPATH is set print(">>> Loading mimickx model {} <<<".format(args.mimickx_model)) model_path = args.model start = time.time() if args.train and len(args.train) != 0: tagger = NNTagger(args.in_dim, args.h_dim, args.c_in_dim, args.c_h_dim, args.h_layers, args.pred_layer, embeds_file=args.embeds, w_dropout_rate=args.word_dropout_rate, c_dropout_rate=args.char_dropout_rate, activation=ACTIVATION_MAP[args.ac], mlp=args.mlp, activation_mlp=ACTIVATION_MAP[args.ac_mlp], noise_sigma=args.sigma, learning_algo=args.trainer, learning_rate=args.learning_rate, backprob_embeds=args.disable_backprob_embeds, initializer=INITIALIZER_MAP[args.initializer], builder=BUILDERS[args.builder], crf=args.crf, mimickx_model_path=args.mimickx_model, dictionary=args.dictionary, type_constraint=args.type_constraint, lex_dim=args.lex_dim, embed_lex=args.embed_lex) dev = None train = SeqData(args.train) if args.dev: dev = SeqData(args.dev) tagger.fit(train, args.iters, dev=dev, model_path=model_path, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses) if not args.dev and not args.patience: # in case patience is active it gets saved in the fit function save(tagger, model_path) if args.test and len(args.test) != 0: tagger = load(args.model, args.dictionary) # check if mimickx provided after training if args.mimickx_model: tagger.mimickx_model_path = args.mimickx_model tagger.mimickx_model = load_model(args.mimickx_model) stdout = sys.stdout # One file per test ... if args.test: test = SeqData(args.test) # read in all test data for i, test_file in enumerate( args.test): # expect them in same order if args.output is not None: sys.stdout = codecs.open(args.output + ".task{}".format(i), 'w', encoding='utf-8') start_testing = time.time() print('\nTesting task{}'.format(i), file=sys.stderr) print('*******\n', file=sys.stderr) correct, total = tagger.evaluate( test, "task{}".format(i), output_predictions=args.output, output_confidences=args.output_confidences, raw=args.raw, unk_tag=None) if not args.raw: print("\nTask{} test accuracy on {} items: {:.4f}".format( i, i + 1, correct / total), file=sys.stderr) print(( "Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds)." .format(time.time() - start, time.time() - start_testing)), file=sys.stderr) sys.stdout = stdout if args.train: print("Info: biLSTM\n\t" + "\n\t".join([ "{}: {}".format(a, v) for a, v in vars(args).items() if a not in ["train", "test", "dev", "pred_layer"] ])) else: # print less when only testing, as not all train params are stored explicitly print("Info: biLSTM\n\t" + "\n\t".join([ "{}: {}".format(a, v) for a, v in vars(args).items() if a not in [ "train", "test", "dev", "pred_layer", "initializer", "ac", "word_dropout_rate", "patience", "sigma", "disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem", "iters" ] ])) tagger = load(args.model, args.dictionary) if args.save_embeds: tagger.save_embeds(args.save_embeds) if args.save_lexembeds: tagger.save_lex_embeds(args.save_lexembeds) if args.save_cwembeds: tagger.save_cw_embeds(args.save_cwembeds) if args.save_lwembeds: tagger.save_lw_embeds(args.save_lwembeds) if args.transition_matrix: tagger.save_transition_matrix(args.transition_matrix)
def main(): parser = argparse.ArgumentParser(description="""Run the bi-LSTM tagger""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) group_main = parser.add_argument_group('Main', 'main arguments') group_main.add_argument("--model", help="path to store/load model [required]", required=True) group_main.add_argument("--train", nargs='*', help="path to train file [if multiple files are given actives MTL]") # allow multiple train files, each asociated with a task = position in the list group_main.add_argument("--dev", nargs='*', help="dev file(s)", required=False) group_main.add_argument("--test", nargs='*', help="test file(s) [same order as --train]", required=False) group_model = parser.add_argument_group('Model', 'specify model parameters') group_model.add_argument("--in_dim", help="input dimension", type=int, default=64) # default Polyglot size group_model.add_argument("--h_dim", help="hidden dimension [default: 100]", type=int, default=100) group_model.add_argument("--c_in_dim", help="input dimension for character embeddings", type=int, default=100) group_model.add_argument("--c_h_dim", help="hidden dimension for character embeddings", type=int, default=100) group_model.add_argument("--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False, type=int, default=1) group_model.add_argument("--pred_layer", nargs='*', help="predict task at this layer [default: last layer]", required=False) # for each task the layer on which it is predicted (default 1) group_model.add_argument("--embeds", help="word embeddings file", required=False, default=None) group_model.add_argument("--crf", help="use CRF instead of local decoding", default=False, action="store_true") group_model.add_argument("--viterbi-loss", help="Use viterbi loss training (only active if --crf is on)", action="store_true", default=False) group_model.add_argument("--transition-matrix", help="store transition matrix from CRF") group_model.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc") group_model.add_argument("--mlp", help="add additional MLP layer of this dimension [default 0=disabled]", default=0, type=int) group_model.add_argument("--ac-mlp", help="activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)", default="tanh", choices=ACTIVATION_MAP.keys()) group_model.add_argument("--ac", help="activation function between hidden layers [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys()) group_input = parser.add_argument_group('Input', 'specific input options') group_input.add_argument("--raw", help="expects raw text input (one sentence per line)", required=False, action="store_true", default=False) group_output = parser.add_argument_group('Output', 'specific output options') group_output.add_argument("--dictionary", help="use dictionary as additional features or type constraints (with --type-constraints)", default=None) group_output.add_argument("--type-constraint", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--embed-lex", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--lex-dim", help="input dimension for lexical features", default=0, type=int) group_output.add_argument("--output", help="output predictions to file [word|gold|pred]", default=None) group_output.add_argument("--output-confidences", help="output tag confidences", action="store_true", default=False) group_output.add_argument("--save-embeds", help="save word embeddings to file", required=False, default=None) group_output.add_argument("--save-lexembeds", help="save lexicon embeddings to file", required=False, default=None) group_output.add_argument("--save-cwembeds", help="save character-based word-embeddings to file", required=False, default=None) group_output.add_argument("--save-lwembeds", help="save lexicon-based word-embeddings to file", required=False, default=None) group_output.add_argument("--mimickx-model", help="use mimickx model for OOVs", required=False, default=None, type=str) group_opt = parser.add_argument_group('Optimizer', 'specify training parameters') group_opt.add_argument("--iters", help="training iterations", type=int,default=20) group_opt.add_argument("--sigma", help="sigma of Gaussian noise",default=0.2, type=float) group_opt.add_argument("--trainer", help="trainer [default: sgd]", choices=TRAINER_MAP.keys(), default="sgd") group_opt.add_argument("--learning-rate", help="learning rate [0: use default]", default=0, type=float) # see: http://dynet.readthedocs.io/en/latest/optimizers.html group_opt.add_argument("--patience", help="patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=0, type=int) group_opt.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False) group_opt.add_argument("--word-dropout-rate", help="word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)", required=False, default=0.25, type=float) group_opt.add_argument("--char-dropout-rate", help="char dropout rate [default: 0=disabled]", required=False, default=0.0, type=float) group_opt.add_argument("--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True) group_opt.add_argument("--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant") group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters') group_dynet.add_argument("--seed", help="random seed (also for DyNet)", required=False, type=int) group_dynet.add_argument("--dynet-mem", help="memory for DyNet", required=False, type=int) group_dynet.add_argument("--dynet-gpus", help="1 for GPU usage", default=0, type=int) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399 group_dynet.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int) group_dynet.add_argument("--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int) try: args = parser.parse_args() except: parser.print_help() exit() if args.train: if len(args.train) > 1: if not args.pred_layer: print("--pred_layer required!") exit() elif len(args.train) == 1 and not args.pred_layer: args.pred_layer = [args.h_layers] # assumes h_layers is 1 if args.c_in_dim == 0: print(">>> disable character embeddings <<<") if args.minibatch_size > 1: print(">>> using minibatch_size {} <<<".format(args.minibatch_size)) if args.viterbi_loss: if not args.crf: print("--crf (global decoding) needs to be active when --viterbi is used") exit() if args.crf: if args.viterbi_loss: print(">>> using global decoding (Viterbi loss) <<<") else: print(">>> using global decoding (CRF, neg-log loss) <<<") if args.patience: if not args.dev or not args.model: print("patience requires a dev set and model path (--dev and --model)") exit() # check if --save folder exists if args.model: if os.path.isdir(args.model): if not os.path.exists(args.model): print("Creating {}..".format(args.model)) os.makedirs(args.model) elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists(os.path.dirname(args.model)): print("Creating {}..".format(os.path.dirname(args.model))) os.makedirs(os.path.dirname(args.model)) if args.output: if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists(os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) if not args.seed: ## set seed seed = random.randint(1, MAX_SEED) else: seed = args.seed print(">>> using seed: {} <<< ".format(seed)) np.random.seed(seed) random.seed(seed) init_dynet(seed) if args.mimickx_model: from mimickx import Mimickx, load_model # make sure PYTHONPATH is set print(">>> Loading mimickx model {} <<<".format(args.mimickx_model)) model_path = args.model start = time.time() if args.train and len( args.train ) != 0: tagger = NNTagger(args.in_dim, args.h_dim, args.c_in_dim, args.c_h_dim, args.h_layers, args.pred_layer, embeds_file=args.embeds, w_dropout_rate=args.word_dropout_rate, c_dropout_rate=args.char_dropout_rate, activation=ACTIVATION_MAP[args.ac], mlp=args.mlp, activation_mlp=ACTIVATION_MAP[args.ac_mlp], noise_sigma=args.sigma, learning_algo=args.trainer, learning_rate=args.learning_rate, backprob_embeds=args.disable_backprob_embeds, initializer=INITIALIZER_MAP[args.initializer], builder=BUILDERS[args.builder], crf=args.crf, mimickx_model_path=args.mimickx_model, dictionary=args.dictionary, type_constraint=args.type_constraint, lex_dim=args.lex_dim, embed_lex=args.embed_lex) dev = None train = SeqData(args.train) if args.dev: dev = SeqData(args.dev) tagger.fit(train, args.iters, dev=dev, model_path=model_path, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses) if not args.dev and not args.patience: # in case patience is active it gets saved in the fit function save(tagger, model_path) if args.test and len( args.test ) != 0: tagger = load(args.model, args.dictionary) # check if mimickx provided after training if args.mimickx_model: tagger.mimickx_model_path = args.mimickx_model tagger.mimickx_model = load_model(args.mimickx_model) stdout = sys.stdout # One file per test ... if args.test: test = SeqData(args.test, raw=args.raw) # read in all test data for i, test_file in enumerate(args.test): # expect them in same order if args.output is not None: sys.stdout = codecs.open(args.output + ".task{}".format(i), 'w', encoding='utf-8') start_testing = time.time() print('\nTesting task{}'.format(i),file=sys.stderr) print('*******\n',file=sys.stderr) correct, total = tagger.evaluate(test, "task{}".format(i), output_predictions=args.output, output_confidences=args.output_confidences, raw=args.raw, unk_tag=None) if not args.raw: print("\nTask{} test accuracy on {} items: {:.4f}".format(i, i+1, correct/total),file=sys.stderr) print(("Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds).".format(time.time()-start, time.time()-start_testing)),file=sys.stderr) sys.stdout = stdout if args.train: print("Info: biLSTM\n\t"+"\n\t".join(["{}: {}".format(a,v) for a, v in vars(args).items() if a not in ["train","test","dev","pred_layer"]])) else: # print less when only testing, as not all train params are stored explicitly print("Info: biLSTM\n\t" + "\n\t".join(["{}: {}".format(a, v) for a, v in vars(args).items() if a not in ["train", "test", "dev", "pred_layer", "initializer","ac","word_dropout_rate", "patience","sigma","disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem","iters"]])) tagger = load(args.model, args.dictionary) if args.save_embeds: tagger.save_embeds(args.save_embeds) if args.save_lexembeds: tagger.save_lex_embeds(args.save_lexembeds) if args.save_cwembeds: tagger.save_cw_embeds(args.save_cwembeds) if args.save_lwembeds: tagger.save_lw_embeds(args.save_lwembeds) if args.transition_matrix: tagger.save_transition_matrix(args.transition_matrix)
def main(): parser = argparse.ArgumentParser(description="""Run the NN tagger""") parser.add_argument("--train", nargs='*', help="train folder for each task") # allow multiple train files, each asociated with a task = position in the list parser.add_argument("--pred_layer", nargs='*', help="layer of predictons for each task", required=True) # for each task the layer on which it is predicted (default 1) parser.add_argument("--model", help="load model from file", required=False) parser.add_argument("--iters", help="training iterations [default: 30]", required=False,type=int,default=30) parser.add_argument("--in_dim", help="input dimension [default: 64] (like Polyglot embeds)", required=False,type=int,default=64) parser.add_argument("--c_in_dim", help="input dimension for character embeddings [default: 100]", required=False,type=int,default=100) parser.add_argument("--h_dim", help="hidden dimension [default: 100]", required=False,type=int,default=100) parser.add_argument("--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False,type=int,default=1) parser.add_argument("--test", nargs='*', help="test file(s)", required=False) # should be in the same order/task as train parser.add_argument("--raw", help="if test file is in raw format (one sentence per line)", required=False, action="store_true", default=False) parser.add_argument("--dev", help="dev file(s)", required=False) parser.add_argument("--output", help="output predictions to file", required=False,default=None) parser.add_argument("--save", help="save model to file (appends .model as well as .pickle)", required=True,default=None) parser.add_argument("--embeds", help="word embeddings file", required=False, default=None) parser.add_argument("--sigma", help="noise sigma", required=False, default=0.2, type=float) parser.add_argument("--ac", help="activation function [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys()) parser.add_argument("--mlp", help="use MLP layer of this dimension [default 0=disabled]", required=False, default=0, type=int) parser.add_argument("--ac-mlp", help="activation function for MLP (if used) [rectify, tanh, ...]", default="rectify", choices=ACTIVATION_MAP.keys()) parser.add_argument("--trainer", help="trainer [default: sgd]", required=False, choices=TRAINER_MAP.keys(), default="sgd") parser.add_argument("--learning-rate", help="learning rate [0: use default]", default=0, type=float) # see: http://dynet.readthedocs.io/en/latest/optimizers.html parser.add_argument("--patience", help="patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=-1, type=int) parser.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False) parser.add_argument("--word-dropout-rate", help="word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kipperwasser & Goldberg, 2016)", required=False, default=0.25, type=float) parser.add_argument("--dynet-seed", help="random seed for dynet (needs to be first argument!)", required=False, type=int) parser.add_argument("--dynet-mem", help="memory for dynet (needs to be first argument!)", required=False, type=int) parser.add_argument("--dynet-gpus", help="1 for GPU usage", default=0, type=int) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399 parser.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int) parser.add_argument("--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int) parser.add_argument("--save-embeds", help="save word embeddings file", required=False, default=None) parser.add_argument("--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True) parser.add_argument("--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant") parser.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc") # new parameters parser.add_argument('--max-vocab-size', type=int, help='the maximum size ' 'of the vocabulary') args = parser.parse_args() if args.output is not None: assert os.path.exists(os.path.dirname(args.output)) if args.train: if not args.pred_layer: print("--pred_layer required!") exit() if args.dynet_seed: print(">>> using seed: {} <<< ".format(args.dynet_seed), file=sys.stderr) np.random.seed(args.dynet_seed) random.seed(args.dynet_seed) if args.c_in_dim == 0: print(">>> disable character embeddings <<<", file=sys.stderr) if args.minibatch_size > 1: print(">>> using minibatch_size {} <<<".format(args.minibatch_size)) if args.patience: if not args.dev or not args.save: print("patience requires a dev set and model path (--dev and --save)") exit() if args.save: # check if folder exists if os.path.isdir(args.save): if not os.path.exists(args.save): print("Creating {}..".format(args.save)) os.makedirs(args.save) if args.output: if os.path.isdir(args.output): outdir = os.path.dirname(args.output) if not os.path.exists(outdir): os.makedirs(outdir) start = time.time() if args.model: print("loading model from file {}".format(args.model), file=sys.stderr) tagger = load(args) else: tagger = NNTagger(args.in_dim, args.h_dim, args.c_in_dim, args.h_layers, args.pred_layer, embeds_file=args.embeds, activation=ACTIVATION_MAP[args.ac], mlp=args.mlp, activation_mlp=ACTIVATION_MAP[args.ac_mlp], noise_sigma=args.sigma, learning_algo=args.trainer, learning_rate=args.learning_rate, backprob_embeds=args.disable_backprob_embeds, initializer=INITIALIZER_MAP[args.initializer], builder=BUILDERS[args.builder], max_vocab_size=args.max_vocab_size ) if args.train and len( args.train ) != 0: tagger.fit(args.train, args.iters, dev=args.dev, word_dropout_rate=args.word_dropout_rate, model_path=args.save, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses) if args.save and not args.patience: # in case patience is active it gets saved in the fit function save(tagger, args.save) if args.patience: # reload patience 2 model tagger = load(args.save) if args.test and len( args.test ) != 0: if not args.model: if not args.train: print("specify a model!") sys.exit() stdout = sys.stdout # One file per test ... for i, test in enumerate(args.test): if args.output is not None: file_pred = args.output+".task"+str(i) sys.stdout = codecs.open(file_pred, 'w', encoding='utf-8') sys.stderr.write('\nTesting Task'+str(i)+'\n') sys.stderr.write('*******\n') test_X, test_Y, org_X, org_Y, task_labels = tagger.get_data_as_indices(test, "task"+str(i), raw=args.raw) correct, total = tagger.evaluate(test_X, test_Y, org_X, org_Y, task_labels, output_predictions=args.output, raw=args.raw) if not args.raw: print("\nTask%s test accuracy on %s items: %.4f" % (i, i+1, correct/total), file=sys.stderr) print(("Done. Took {0:.2f} seconds.".format(time.time()-start)),file=sys.stderr) sys.stdout = stdout if args.train: print("Info: biLSTM\n\t"+"\n\t".join(["{}: {}".format(a,v) for a, v in vars(args).items() if a not in ["train","test","dev","pred_layer"]])) else: # print less when only testing, as not all train params are stored explicitly print("Info: biLSTM\n\t" + "\n\t".join(["{}: {}".format(a, v) for a, v in vars(args).items() if a not in ["train", "test", "dev", "pred_layer", "initializer","ac","word_dropout_rate", "patience","sigma","disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem","iters"]])) if args.save_embeds: tagger.save_embeds(args.save_embeds)