def __init__(self,in_dim,h_dim,c_in_dim,c_h_dim,h_layers,pred_layer,learning_algo="sgd", learning_rate=0, embeds_file=None,activation=ACTIVATION_MAP["tanh"],mlp=0,activation_mlp=ACTIVATION_MAP["rectify"], backprob_embeds=True,noise_sigma=0.1, w_dropout_rate=0.25, c_dropout_rate=0.25, initializer=INITIALIZER_MAP["glorot"], builder=BUILDERS["lstmc"], crf=False, viterbi_loss=False, mimickx_model_path=None, dictionary=None, type_constraint=False, lex_dim=0, embed_lex=False): self.w2i = {} # word to index mapping self.c2i = {} # char to index mapping self.w2c_cache = {} # word to char index cache for frequent words self.wcount = None # word count self.ccount = None # char count self.task2tag2idx = {} # need one dictionary per task self.pred_layer = [int(layer) for layer in pred_layer] # at which layer to predict each task self.model = dynet.ParameterCollection() #init model self.in_dim = in_dim self.h_dim = h_dim self.c_in_dim = c_in_dim self.c_h_dim = c_h_dim self.w_dropout_rate = w_dropout_rate self.c_dropout_rate = c_dropout_rate self.activation = activation self.mlp = mlp self.activation_mlp = activation_mlp self.noise_sigma = noise_sigma self.h_layers = h_layers self.predictors = {"inner": [], "output_layers_dict": {}, "task_expected_at": {} } # the inner layers and predictors self.wembeds = None # lookup: embeddings for words self.cembeds = None # lookup: embeddings for characters self.lembeds = None # lookup: embeddings for lexical features (optional) self.embeds_file = embeds_file trainer_algo = TRAINER_MAP[learning_algo] if learning_rate > 0: ### TODO: better handling of additional learning-specific parameters self.trainer = trainer_algo(self.model, learning_rate=learning_rate) else: # using default learning rate self.trainer = trainer_algo(self.model) self.backprob_embeds = backprob_embeds self.initializer = initializer self.char_rnn = None # biRNN for character input self.builder = builder # default biRNN is an LSTM self.crf = crf self.viterbi_loss = viterbi_loss self.mimickx_model_path = mimickx_model_path if mimickx_model_path: # load self.mimickx_model = load_model(mimickx_model_path) self.dictionary = None self.type_constraint = type_constraint self.embed_lex = False self.l2i = {UNK: 0} # lex feature to index mapping if dictionary: self.dictionary, self.dictionary_values = load_dict(dictionary) self.path_to_dictionary = dictionary if type_constraint: self.lex_dim = 0 else: if embed_lex: self.lex_dim = lex_dim self.embed_lex = True print("Embed lexical features") # register property indices for prop in self.dictionary_values: self.l2i[prop] = len(self.l2i) else: self.lex_dim = len(self.dictionary_values) #n-hot encoding print("Lex_dim: {}".format(self.lex_dim), file=sys.stderr) else: self.dictionary = None self.path_to_dictionary = None self.lex_dim = 0
def __init__(self, in_dim, h_dim, c_in_dim, c_h_dim, h_layers, pred_layer, learning_algo="sgd", learning_rate=0, embeds_file=None, activation=ACTIVATION_MAP["tanh"], mlp=0, activation_mlp=ACTIVATION_MAP["rectify"], backprob_embeds=True, noise_sigma=0.1, w_dropout_rate=0.25, c_dropout_rate=0.25, initializer=INITIALIZER_MAP["glorot"], builder=BUILDERS["lstmc"], crf=False, viterbi_loss=False, mimickx_model_path=None, dictionary=None, type_constraint=False, lex_dim=0, embed_lex=False): self.w2i = {} # word to index mapping self.c2i = {} # char to index mapping self.w2c_cache = {} # word to char index cache for frequent words self.wcount = None # word count self.ccount = None # char count self.task2tag2idx = {} # need one dictionary per task self.pred_layer = [int(layer) for layer in pred_layer ] # at which layer to predict each task self.model = dynet.ParameterCollection() #init model self.in_dim = in_dim self.h_dim = h_dim self.c_in_dim = c_in_dim self.c_h_dim = c_h_dim self.w_dropout_rate = w_dropout_rate self.c_dropout_rate = c_dropout_rate self.activation = activation self.mlp = mlp self.activation_mlp = activation_mlp self.noise_sigma = noise_sigma self.h_layers = h_layers self.predictors = { "inner": [], "output_layers_dict": {}, "task_expected_at": {} } # the inner layers and predictors self.wembeds = None # lookup: embeddings for words self.cembeds = None # lookup: embeddings for characters self.lembeds = None # lookup: embeddings for lexical features (optional) self.embeds_file = embeds_file trainer_algo = TRAINER_MAP[learning_algo] if learning_rate > 0: ### TODO: better handling of additional learning-specific parameters self.trainer = trainer_algo(self.model, learning_rate=learning_rate) else: # using default learning rate self.trainer = trainer_algo(self.model) self.backprob_embeds = backprob_embeds self.initializer = initializer self.char_rnn = None # biRNN for character input self.builder = builder # default biRNN is an LSTM self.crf = crf self.viterbi_loss = viterbi_loss self.mimickx_model_path = mimickx_model_path if mimickx_model_path: # load self.mimickx_model = load_model(mimickx_model_path) self.dictionary = None self.type_constraint = type_constraint self.embed_lex = False self.l2i = {UNK: 0} # lex feature to index mapping if dictionary: self.dictionary, self.dictionary_values = load_dict(dictionary) self.path_to_dictionary = dictionary if type_constraint: self.lex_dim = 0 else: if embed_lex: self.lex_dim = lex_dim self.embed_lex = True print("Embed lexical features") # register property indices for prop in self.dictionary_values: self.l2i[prop] = len(self.l2i) else: self.lex_dim = len(self.dictionary_values) #n-hot encoding print("Lex_dim: {}".format(self.lex_dim), file=sys.stderr) else: self.dictionary = None self.path_to_dictionary = None self.lex_dim = 0
def main(): parser = argparse.ArgumentParser(description="""Run the bi-LSTM tagger""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) group_main = parser.add_argument_group('Main', 'main arguments') group_main.add_argument("--model", help="path to store/load model [required]", required=True) group_main.add_argument("--train", nargs='*', help="path to train file [if multiple files are given actives MTL]") # allow multiple train files, each asociated with a task = position in the list group_main.add_argument("--dev", nargs='*', help="dev file(s)", required=False) group_main.add_argument("--test", nargs='*', help="test file(s) [same order as --train]", required=False) group_model = parser.add_argument_group('Model', 'specify model parameters') group_model.add_argument("--in_dim", help="input dimension", type=int, default=64) # default Polyglot size group_model.add_argument("--h_dim", help="hidden dimension [default: 100]", type=int, default=100) group_model.add_argument("--c_in_dim", help="input dimension for character embeddings", type=int, default=100) group_model.add_argument("--c_h_dim", help="hidden dimension for character embeddings", type=int, default=100) group_model.add_argument("--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False, type=int, default=1) group_model.add_argument("--pred_layer", nargs='*', help="predict task at this layer [default: last layer]", required=False) # for each task the layer on which it is predicted (default 1) group_model.add_argument("--embeds", help="word embeddings file", required=False, default=None) group_model.add_argument("--crf", help="use CRF instead of local decoding", default=False, action="store_true") group_model.add_argument("--viterbi-loss", help="Use viterbi loss training (only active if --crf is on)", action="store_true", default=False) group_model.add_argument("--transition-matrix", help="store transition matrix from CRF") group_model.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc") group_model.add_argument("--mlp", help="add additional MLP layer of this dimension [default 0=disabled]", default=0, type=int) group_model.add_argument("--ac-mlp", help="activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)", default="tanh", choices=ACTIVATION_MAP.keys()) group_model.add_argument("--ac", help="activation function between hidden layers [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys()) group_input = parser.add_argument_group('Input', 'specific input options') group_input.add_argument("--raw", help="expects raw text input (one sentence per line)", required=False, action="store_true", default=False) group_output = parser.add_argument_group('Output', 'specific output options') group_output.add_argument("--dictionary", help="use dictionary as additional features or type constraints (with --type-constraints)", default=None) group_output.add_argument("--type-constraint", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--embed-lex", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--lex-dim", help="input dimension for lexical features", default=0, type=int) group_output.add_argument("--output", help="output predictions to file [word|gold|pred]", default=None) group_output.add_argument("--output-confidences", help="output tag confidences", action="store_true", default=False) group_output.add_argument("--save-embeds", help="save word embeddings to file", required=False, default=None) group_output.add_argument("--save-lexembeds", help="save lexicon embeddings to file", required=False, default=None) group_output.add_argument("--save-cwembeds", help="save character-based word-embeddings to file", required=False, default=None) group_output.add_argument("--save-lwembeds", help="save lexicon-based word-embeddings to file", required=False, default=None) group_output.add_argument("--mimickx-model", help="use mimickx model for OOVs", required=False, default=None, type=str) group_opt = parser.add_argument_group('Optimizer', 'specify training parameters') group_opt.add_argument("--iters", help="training iterations", type=int,default=20) group_opt.add_argument("--sigma", help="sigma of Gaussian noise",default=0.2, type=float) group_opt.add_argument("--trainer", help="trainer [default: sgd]", choices=TRAINER_MAP.keys(), default="sgd") group_opt.add_argument("--learning-rate", help="learning rate [0: use default]", default=0, type=float) # see: http://dynet.readthedocs.io/en/latest/optimizers.html group_opt.add_argument("--patience", help="patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=0, type=int) group_opt.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False) group_opt.add_argument("--word-dropout-rate", help="word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)", required=False, default=0.25, type=float) group_opt.add_argument("--char-dropout-rate", help="char dropout rate [default: 0=disabled]", required=False, default=0.0, type=float) group_opt.add_argument("--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True) group_opt.add_argument("--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant") group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters') group_dynet.add_argument("--seed", help="random seed (also for DyNet)", required=False, type=int) group_dynet.add_argument("--dynet-mem", help="memory for DyNet", required=False, type=int) group_dynet.add_argument("--dynet-gpus", help="1 for GPU usage", default=0, type=int) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399 group_dynet.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int) group_dynet.add_argument("--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int) try: args = parser.parse_args() except: parser.print_help() exit() if args.train: if len(args.train) > 1: if not args.pred_layer: print("--pred_layer required!") exit() elif len(args.train) == 1 and not args.pred_layer: args.pred_layer = [args.h_layers] # assumes h_layers is 1 if args.c_in_dim == 0: print(">>> disable character embeddings <<<") if args.minibatch_size > 1: print(">>> using minibatch_size {} <<<".format(args.minibatch_size)) if args.viterbi_loss: if not args.crf: print("--crf (global decoding) needs to be active when --viterbi is used") exit() if args.crf: if args.viterbi_loss: print(">>> using global decoding (Viterbi loss) <<<") else: print(">>> using global decoding (CRF, neg-log loss) <<<") if args.patience: if not args.dev or not args.model: print("patience requires a dev set and model path (--dev and --model)") exit() # check if --save folder exists if args.model: if os.path.isdir(args.model): if not os.path.exists(args.model): print("Creating {}..".format(args.model)) os.makedirs(args.model) elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists(os.path.dirname(args.model)): print("Creating {}..".format(os.path.dirname(args.model))) os.makedirs(os.path.dirname(args.model)) if args.output: if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists(os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) if not args.seed: ## set seed seed = random.randint(1, MAX_SEED) else: seed = args.seed print(">>> using seed: {} <<< ".format(seed)) np.random.seed(seed) random.seed(seed) init_dynet(seed) if args.mimickx_model: from mimickx import Mimickx, load_model # make sure PYTHONPATH is set print(">>> Loading mimickx model {} <<<".format(args.mimickx_model)) model_path = args.model start = time.time() if args.train and len( args.train ) != 0: tagger = NNTagger(args.in_dim, args.h_dim, args.c_in_dim, args.c_h_dim, args.h_layers, args.pred_layer, embeds_file=args.embeds, w_dropout_rate=args.word_dropout_rate, c_dropout_rate=args.char_dropout_rate, activation=ACTIVATION_MAP[args.ac], mlp=args.mlp, activation_mlp=ACTIVATION_MAP[args.ac_mlp], noise_sigma=args.sigma, learning_algo=args.trainer, learning_rate=args.learning_rate, backprob_embeds=args.disable_backprob_embeds, initializer=INITIALIZER_MAP[args.initializer], builder=BUILDERS[args.builder], crf=args.crf, mimickx_model_path=args.mimickx_model, dictionary=args.dictionary, type_constraint=args.type_constraint, lex_dim=args.lex_dim, embed_lex=args.embed_lex) dev = None train = SeqData(args.train) if args.dev: dev = SeqData(args.dev) tagger.fit(train, args.iters, dev=dev, model_path=model_path, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses) if not args.dev and not args.patience: # in case patience is active it gets saved in the fit function save(tagger, model_path) if args.test and len( args.test ) != 0: tagger = load(args.model, args.dictionary) # check if mimickx provided after training if args.mimickx_model: tagger.mimickx_model_path = args.mimickx_model tagger.mimickx_model = load_model(args.mimickx_model) stdout = sys.stdout # One file per test ... if args.test: test = SeqData(args.test, raw=args.raw) # read in all test data for i, test_file in enumerate(args.test): # expect them in same order if args.output is not None: sys.stdout = codecs.open(args.output + ".task{}".format(i), 'w', encoding='utf-8') start_testing = time.time() print('\nTesting task{}'.format(i),file=sys.stderr) print('*******\n',file=sys.stderr) correct, total = tagger.evaluate(test, "task{}".format(i), output_predictions=args.output, output_confidences=args.output_confidences, raw=args.raw, unk_tag=None) if not args.raw: print("\nTask{} test accuracy on {} items: {:.4f}".format(i, i+1, correct/total),file=sys.stderr) print(("Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds).".format(time.time()-start, time.time()-start_testing)),file=sys.stderr) sys.stdout = stdout if args.train: print("Info: biLSTM\n\t"+"\n\t".join(["{}: {}".format(a,v) for a, v in vars(args).items() if a not in ["train","test","dev","pred_layer"]])) else: # print less when only testing, as not all train params are stored explicitly print("Info: biLSTM\n\t" + "\n\t".join(["{}: {}".format(a, v) for a, v in vars(args).items() if a not in ["train", "test", "dev", "pred_layer", "initializer","ac","word_dropout_rate", "patience","sigma","disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem","iters"]])) tagger = load(args.model, args.dictionary) if args.save_embeds: tagger.save_embeds(args.save_embeds) if args.save_lexembeds: tagger.save_lex_embeds(args.save_lexembeds) if args.save_cwembeds: tagger.save_cw_embeds(args.save_cwembeds) if args.save_lwembeds: tagger.save_lw_embeds(args.save_lwembeds) if args.transition_matrix: tagger.save_transition_matrix(args.transition_matrix)
def main(): parser = argparse.ArgumentParser( description="""Run the bi-LSTM tagger""", formatter_class=argparse.ArgumentDefaultsHelpFormatter) group_main = parser.add_argument_group('Main', 'main arguments') group_main.add_argument("--model", help="path to store/load model [required]", required=True) group_main.add_argument( "--train", nargs='*', help="path to train file [if multiple files are given actives MTL]" ) # allow multiple train files, each asociated with a task = position in the list group_main.add_argument("--dev", nargs='*', help="dev file(s)", required=False) group_main.add_argument("--test", nargs='*', help="test file(s) [same order as --train]", required=False) group_model = parser.add_argument_group('Model', 'specify model parameters') group_model.add_argument("--in_dim", help="input dimension", type=int, default=64) # default Polyglot size group_model.add_argument("--h_dim", help="hidden dimension [default: 100]", type=int, default=100) group_model.add_argument("--c_in_dim", help="input dimension for character embeddings", type=int, default=100) group_model.add_argument("--c_h_dim", help="hidden dimension for character embeddings", type=int, default=100) group_model.add_argument( "--h_layers", help="number of stacked LSTMs [default: 1 = no stacking]", required=False, type=int, default=1) group_model.add_argument( "--pred_layer", nargs='*', help="predict task at this layer [default: last layer]", required=False ) # for each task the layer on which it is predicted (default 1) group_model.add_argument("--embeds", help="word embeddings file", required=False, default=None) group_model.add_argument("--crf", help="use CRF instead of local decoding", default=False, action="store_true") group_model.add_argument( "--viterbi-loss", help="Use viterbi loss training (only active if --crf is on)", action="store_true", default=False) group_model.add_argument("--transition-matrix", help="store transition matrix from CRF") group_model.add_argument("--builder", help="RNN builder (default: lstmc)", choices=BUILDERS.keys(), default="lstmc") group_model.add_argument( "--mlp", help="add additional MLP layer of this dimension [default 0=disabled]", default=0, type=int) group_model.add_argument( "--ac-mlp", help= "activation function for optional MLP layer [rectify, tanh, ...] (default: tanh)", default="tanh", choices=ACTIVATION_MAP.keys()) group_model.add_argument( "--ac", help="activation function between hidden layers [rectify, tanh, ...]", default="tanh", choices=ACTIVATION_MAP.keys()) group_input = parser.add_argument_group('Input', 'specific input options') group_input.add_argument( "--raw", help="expects raw text input (one sentence per line)", required=False, action="store_true", default=False) group_output = parser.add_argument_group('Output', 'specific output options') group_output.add_argument( "--dictionary", help= "use dictionary as additional features or type constraints (with --type-constraints)", default=None) group_output.add_argument("--type-constraint", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--embed-lex", help="use dictionary as type constraints", default=False, action="store_true") group_output.add_argument("--lex-dim", help="input dimension for lexical features", default=0, type=int) group_output.add_argument( "--output", help="output predictions to file [word|gold|pred]", default=None) group_output.add_argument("--output-confidences", help="output tag confidences", action="store_true", default=False) group_output.add_argument("--save-embeds", help="save word embeddings to file", required=False, default=None) group_output.add_argument("--save-lexembeds", help="save lexicon embeddings to file", required=False, default=None) group_output.add_argument( "--save-cwembeds", help="save character-based word-embeddings to file", required=False, default=None) group_output.add_argument( "--save-lwembeds", help="save lexicon-based word-embeddings to file", required=False, default=None) group_output.add_argument("--mimickx-model", help="use mimickx model for OOVs", required=False, default=None, type=str) group_opt = parser.add_argument_group('Optimizer', 'specify training parameters') group_opt.add_argument("--iters", help="training iterations", type=int, default=20) group_opt.add_argument("--sigma", help="sigma of Gaussian noise", default=0.2, type=float) group_opt.add_argument("--trainer", help="trainer [default: sgd]", choices=TRAINER_MAP.keys(), default="sgd") group_opt.add_argument( "--learning-rate", help="learning rate [0: use default]", default=0, type=float ) # see: http://dynet.readthedocs.io/en/latest/optimizers.html group_opt.add_argument( "--patience", help= "patience [default: 0=not used], requires specification of --dev and model path --save", required=False, default=0, type=int) group_opt.add_argument("--log-losses", help="log loss (for each task if multiple active)", required=False, action="store_true", default=False) group_opt.add_argument( "--word-dropout-rate", help= "word dropout rate [default: 0.25], if 0=disabled, recommended: 0.25 (Kiperwasser & Goldberg, 2016)", required=False, default=0.25, type=float) group_opt.add_argument("--char-dropout-rate", help="char dropout rate [default: 0=disabled]", required=False, default=0.0, type=float) group_opt.add_argument( "--disable-backprob-embeds", help="disable backprob into embeddings (default is to update)", required=False, action="store_false", default=True) group_opt.add_argument( "--initializer", help="initializer for embeddings (default: constant)", choices=INITIALIZER_MAP.keys(), default="constant") group_dynet = parser.add_argument_group('DyNet', 'DyNet parameters') group_dynet.add_argument("--seed", help="random seed (also for DyNet)", required=False, type=int) group_dynet.add_argument("--dynet-mem", help="memory for DyNet", required=False, type=int) group_dynet.add_argument( "--dynet-gpus", help="1 for GPU usage", default=0, type=int ) # warning: non-deterministic results on GPU https://github.com/clab/dynet/issues/399 group_dynet.add_argument("--dynet-autobatch", help="if 1 enable autobatching", default=0, type=int) group_dynet.add_argument( "--minibatch-size", help="size of minibatch for autobatching (1=disabled)", default=1, type=int) try: args = parser.parse_args() except: parser.print_help() exit() if args.train: if len(args.train) > 1: if not args.pred_layer: print("--pred_layer required!") exit() elif len(args.train) == 1 and not args.pred_layer: args.pred_layer = [args.h_layers] # assumes h_layers is 1 if args.c_in_dim == 0: print(">>> disable character embeddings <<<") if args.minibatch_size > 1: print(">>> using minibatch_size {} <<<".format(args.minibatch_size)) if args.viterbi_loss: if not args.crf: print( "--crf (global decoding) needs to be active when --viterbi is used" ) exit() if args.crf: if args.viterbi_loss: print(">>> using global decoding (Viterbi loss) <<<") else: print(">>> using global decoding (CRF, neg-log loss) <<<") if args.patience: if not args.dev or not args.model: print( "patience requires a dev set and model path (--dev and --model)" ) exit() # check if --save folder exists if args.model: if os.path.isdir(args.model): if not os.path.exists(args.model): print("Creating {}..".format(args.model)) os.makedirs(args.model) elif os.path.isdir(os.path.dirname(args.model)) and not os.path.exists( os.path.dirname(args.model)): print("Creating {}..".format(os.path.dirname(args.model))) os.makedirs(os.path.dirname(args.model)) if args.output: if os.path.isdir(os.path.dirname(args.output)) and not os.path.exists( os.path.dirname(args.output)): os.makedirs(os.path.dirname(args.output)) if not args.seed: ## set seed seed = random.randint(1, MAX_SEED) else: seed = args.seed print(">>> using seed: {} <<< ".format(seed)) np.random.seed(seed) random.seed(seed) init_dynet(seed) if args.mimickx_model: from mimickx import Mimickx, load_model # make sure PYTHONPATH is set print(">>> Loading mimickx model {} <<<".format(args.mimickx_model)) model_path = args.model start = time.time() if args.train and len(args.train) != 0: tagger = NNTagger(args.in_dim, args.h_dim, args.c_in_dim, args.c_h_dim, args.h_layers, args.pred_layer, embeds_file=args.embeds, w_dropout_rate=args.word_dropout_rate, c_dropout_rate=args.char_dropout_rate, activation=ACTIVATION_MAP[args.ac], mlp=args.mlp, activation_mlp=ACTIVATION_MAP[args.ac_mlp], noise_sigma=args.sigma, learning_algo=args.trainer, learning_rate=args.learning_rate, backprob_embeds=args.disable_backprob_embeds, initializer=INITIALIZER_MAP[args.initializer], builder=BUILDERS[args.builder], crf=args.crf, mimickx_model_path=args.mimickx_model, dictionary=args.dictionary, type_constraint=args.type_constraint, lex_dim=args.lex_dim, embed_lex=args.embed_lex) dev = None train = SeqData(args.train) if args.dev: dev = SeqData(args.dev) tagger.fit(train, args.iters, dev=dev, model_path=model_path, patience=args.patience, minibatch_size=args.minibatch_size, log_losses=args.log_losses) if not args.dev and not args.patience: # in case patience is active it gets saved in the fit function save(tagger, model_path) if args.test and len(args.test) != 0: tagger = load(args.model, args.dictionary) # check if mimickx provided after training if args.mimickx_model: tagger.mimickx_model_path = args.mimickx_model tagger.mimickx_model = load_model(args.mimickx_model) stdout = sys.stdout # One file per test ... if args.test: test = SeqData(args.test) # read in all test data for i, test_file in enumerate( args.test): # expect them in same order if args.output is not None: sys.stdout = codecs.open(args.output + ".task{}".format(i), 'w', encoding='utf-8') start_testing = time.time() print('\nTesting task{}'.format(i), file=sys.stderr) print('*******\n', file=sys.stderr) correct, total = tagger.evaluate( test, "task{}".format(i), output_predictions=args.output, output_confidences=args.output_confidences, raw=args.raw, unk_tag=None) if not args.raw: print("\nTask{} test accuracy on {} items: {:.4f}".format( i, i + 1, correct / total), file=sys.stderr) print(( "Done. Took {0:.2f} seconds in total (testing took {1:.2f} seconds)." .format(time.time() - start, time.time() - start_testing)), file=sys.stderr) sys.stdout = stdout if args.train: print("Info: biLSTM\n\t" + "\n\t".join([ "{}: {}".format(a, v) for a, v in vars(args).items() if a not in ["train", "test", "dev", "pred_layer"] ])) else: # print less when only testing, as not all train params are stored explicitly print("Info: biLSTM\n\t" + "\n\t".join([ "{}: {}".format(a, v) for a, v in vars(args).items() if a not in [ "train", "test", "dev", "pred_layer", "initializer", "ac", "word_dropout_rate", "patience", "sigma", "disable_backprob_embed", "trainer", "dynet_seed", "dynet_mem", "iters" ] ])) tagger = load(args.model, args.dictionary) if args.save_embeds: tagger.save_embeds(args.save_embeds) if args.save_lexembeds: tagger.save_lex_embeds(args.save_lexembeds) if args.save_cwembeds: tagger.save_cw_embeds(args.save_cwembeds) if args.save_lwembeds: tagger.save_lw_embeds(args.save_lwembeds) if args.transition_matrix: tagger.save_transition_matrix(args.transition_matrix)