def __init__(self, context: mx.context.Context, inputs: str, references: str, model: str, max_input_len: Optional[int] = None, beam_size: int = C.DEFAULT_BEAM_SIZE, bucket_width_source: int = 10, length_penalty_alpha: float = 1.0, length_penalty_beta: float = 0.0, softmax_temperature: Optional[float] = None, max_output_length_num_stds: int = C.DEFAULT_NUM_STD_MAX_OUTPUT_LENGTH, ensemble_mode: str = 'linear', sample_size: int = -1, random_seed: int = 42) -> None: self.context = context self.max_input_len = max_input_len self.max_output_length_num_stds = max_output_length_num_stds self.ensemble_mode = ensemble_mode self.beam_size = beam_size self.batch_size = 16 self.bucket_width_source = bucket_width_source self.length_penalty_alpha = length_penalty_alpha self.length_penalty_beta = length_penalty_beta self.softmax_temperature = softmax_temperature self.model = model with data_io.smart_open(inputs) as inputs_fin, data_io.smart_open(references) as references_fin: input_sentences = inputs_fin.readlines() target_sentences = references_fin.readlines() utils.check_condition(len(input_sentences) == len(target_sentences), "Number of sentence pairs do not match") if sample_size <= 0: sample_size = len(input_sentences) if sample_size < len(input_sentences): # custom random number generator to guarantee the same samples across runs in order to be able to # compare metrics across independent runs random_gen = random.Random(random_seed) self.input_sentences, self.target_sentences = zip( *random_gen.sample(list(zip(input_sentences, target_sentences)), sample_size)) else: self.input_sentences, self.target_sentences = input_sentences, target_sentences logger.info("Created CheckpointDecoder(max_input_len=%d, beam_size=%d, model=%s, num_sentences=%d)", max_input_len if max_input_len is not None else -1, beam_size, model, len(self.input_sentences)) with data_io.smart_open(os.path.join(self.model, C.DECODE_REF_NAME), 'w') as trg_out, \ data_io.smart_open(os.path.join(self.model, C.DECODE_IN_NAME), 'w') as src_out: [trg_out.write(s) for s in self.target_sentences] [src_out.write(s) for s in self.input_sentences]
def load_siamese_cnn(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = T.matrix("x1") x2 = T.matrix("x2") # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = siamese.SiameseCNN( rng, x1, x2, input_shape, conv_layer_specs=options_dict["conv_layer_specs"], hidden_layer_specs=options_dict["hidden_layer_specs"], dropout_rates=None, # dropout is not performed after training ) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_siamese_triplets_lstm_minibatch(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) m1 = tensor.matrix("m1", dtype=THEANOTYPE) m2 = tensor.matrix("m2", dtype=THEANOTYPE) m3 = tensor.matrix("m3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model model = siamese.SiameseTripleBatchLSTM( rng, x1, x2, x3, m1, m2, m3, n_in=39, n_hiddens=options_dict["n_hiddens"], output_type=options_dict["sequence_output_type"]) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_siamese_triplets_lstm_nn_minibatch(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) m1 = tensor.matrix("m1", dtype=THEANOTYPE) m2 = tensor.matrix("m2", dtype=THEANOTYPE) m3 = tensor.matrix("m3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model model = siamese.SiameseTripleBatchLSTM(rng, x1, x2, x3, m1, m2, m3, n_in=39, n_hiddens=options_dict["n_hiddens"]) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_siamese_triplets_cnn(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = T.matrix("x1") x2 = T.matrix("x2") x3 = T.matrix("x3") # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = siamese.SiameseTripletCNN( rng, x1, x2, x3, input_shape, conv_layer_specs=options_dict["conv_layer_specs"], hidden_layer_specs=options_dict["hidden_layer_specs"], dropout_rates=None, # dropout is not performed after training ) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_siamese_triplets_lstm(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) if options_dict.has_key("sequence_output_type"): sequence_output_type = options_dict["sequence_output_type"] else: sequence_output_type = "last" model = siamese.SiameseTripletLSTM( rng, x1, x2, x3, n_in=39, n_hiddens=options_dict["n_hiddens"], output_type=sequence_output_type) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_cnn(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x = T.matrix("x") # flattened data of shape (n_data, d_in) y = T.ivector("y") # labels # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = cnn.CNN( rng, x, input_shape, options_dict["conv_layer_specs"], options_dict["hidden_layer_specs"], options_dict["d_out"], dropout_rates=None, # dropout is not performed after training ) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_mlp(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x = T.matrix("x") # flattened data of shape (n_data, d_in) y = T.ivector("y") # labels # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model d_in = 39 * 200 model = mlp.MLP( rng, x, d_in, options_dict["d_out"], options_dict["hidden_layer_specs"], dropout_rates=None # dropout is not performed after training ) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def load_siamese_triplets_lstm_nn(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model model = siamese.SiameseTripleLSTMNN( rng, x1, x2, x3, n_in=39, n_hiddens=options_dict["n_hiddens"], mlp_hidden_specs=options_dict["hidden_layer_specs"]) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def read_and_translate(translator: inference.Translator, output_handler: output_handler.OutputHandler, chunk_size: Optional[int], source: Optional[str] = None, reference: Optional[str] = None, dictionary: Optional[dict] = None) -> None: """ Reads from either a file or stdin and translates each line, calling the output_handler with the result. :param output_handler: Handler that will write output to a stream. :param translator: Translator that will translate each line of input. :param chunk_size: The size of the portion to read at a time from the input. :param source: Path to file which will be translated line-by-line if included, if none use stdin. :param reference: Path to reference file. :param dictionary: dictionary to constrain translation. """ source_data = sys.stdin if source is None else data_io.smart_open(source) reference_data = None if reference is None else data_io.smart_open(reference) batch_size = translator.batch_size if chunk_size is None: if translator.batch_size == 1: # No batching, therefore there is not need to read segments in chunks. chunk_size = C.CHUNK_SIZE_NO_BATCHING else: # Get a constant number of batches per call to Translator.translate. chunk_size = C.CHUNK_SIZE_PER_BATCH_SEGMENT * translator.batch_size else: if chunk_size < translator.batch_size: logger.warning("You specified a chunk size (%d) smaller than the batch size (%d). This will lead to " "a degregation of translation speed. Consider choosing a larger chunk size." % (chunk_size, batch_size)) logger.info("Translating...") total_time, total_lines = 0.0, 0 for chunk, reference_chunk in itertools.zip_longest(grouper(source_data, chunk_size), grouper(reference_data, chunk_size) if reference_data is not None else [None]): chunk_time = translate(output_handler, chunk, translator, total_lines, reference_chunk) total_lines += len(chunk) total_time += chunk_time if total_lines != 0: logger.info("Processed %d lines in %d batches. Total time: %.4f, sec/sent: %.4f, sent/sec: %.4f", total_lines, ceil(total_lines / batch_size), total_time, total_time / total_lines, total_lines / total_time) else: logger.info("Processed 0 lines.")
def main(): args = check_argv() model_fn = path.join(args.model_dir, "model.pkl.gz") options_dict_fn = path.join(args.model_dir, "options_dict.pkl.gz") record_dict_fn = path.join(args.model_dir, "record_dict.pkl.gz") print "Reading:", options_dict_fn f = smart_open(options_dict_fn) options_dict = pickle.load(f) f.close() print "Reading:", record_dict_fn f = smart_open(record_dict_fn) record_dict = pickle.load(f) f.close() plotting.plot_record_dict(record_dict) model = train_mlp.load_mlp(options_dict) # Plot some filters analyze_layer = 0 W = model.layers[analyze_layer].W.get_value(borrow=True).T plot_fn = path.join(args.model_dir, "filters.layer_" + str(analyze_layer) + ".png") image = Image.fromarray( plotting.tile_images(W, image_shape=(39, 200), tile_shape=(5, 6))) print("Saving: " + plot_fn) image.save(plot_fn) plt.figure() plt.imshow(image, cmap=plt.cm.Greys_r, interpolation="nearest") analyze_layer = -1 W = model.layers[analyze_layer].W.get_value(borrow=True) plot_fn = path.join(args.model_dir, "filters.layer_" + str(analyze_layer) + ".png") image = Image.fromarray(plotting.array_to_pixels(W)) image.save(plot_fn) print("Saving: " + plot_fn) plt.figure() plt.imshow(image, cmap=plt.cm.Greys_r, interpolation="nearest") # plt.axis("off") plt.show()
def main(): args = check_argv() model_fn = path.join(args.model_dir, "model.pkl.gz") options_dict_fn = path.join(args.model_dir, "options_dict.pkl.gz") record_dict_fn = path.join(args.model_dir, "record_dict.pkl.gz") print "Reading:", options_dict_fn f = smart_open(options_dict_fn) options_dict = pickle.load(f) f.close() print "Reading:", record_dict_fn f = smart_open(record_dict_fn) record_dict = pickle.load(f) f.close() plotting.plot_record_dict(record_dict) model = train_mlp.load_mlp(options_dict) # Plot some filters analyze_layer = 0 W = model.layers[analyze_layer].W.get_value(borrow=True).T plot_fn = path.join(args.model_dir, "filters.layer_" + str(analyze_layer) + ".png") image = Image.fromarray(plotting.tile_images( W, image_shape=(39, 200), tile_shape=(5, 6) )) print("Saving: " + plot_fn) image.save(plot_fn) plt.figure() plt.imshow(image, cmap=plt.cm.Greys_r, interpolation="nearest") analyze_layer = -1 W = model.layers[analyze_layer].W.get_value(borrow=True) plot_fn = path.join(args.model_dir, "filters.layer_" + str(analyze_layer) + ".png") image = Image.fromarray(plotting.array_to_pixels(W)) image.save(plot_fn) print("Saving: " + plot_fn) plt.figure() plt.imshow(image, cmap=plt.cm.Greys_r, interpolation="nearest") # plt.axis("off") plt.show()
def test_multisaveload(): rng = numpy.random.RandomState(0) x = tensor.matrix("x", dtype=THEANOTYPE) n_in = 3 n_hiddens = [10, 10] multi_lstm = MultiLayerLSTM(rng, x, n_in, n_hiddens, output_type="last") f0 = theano.function(inputs=[x], outputs=multi_lstm.output) save_file = data_io.smart_open("model.pkl.gz", "wb") multi_lstm.save(save_file) save_file.close() n_in1 = 4 n_hiddens1 = [11, 11] multi_lstm1 = MultiLayerLSTM(rng, x, n_in, n_hiddens, output_type="last") load_file = data_io.smart_open("model.pkl.gz", "rb") multi_lstm1.load(load_file) load_file.close() f1 = theano.function(inputs=[x], outputs=multi_lstm1.layers[0].output) n_data = 10 x0 = rng.randn(n_data, n_in).astype(THEANOTYPE) import pdb; pdb.set_trace()
def test_multisaveload(): rng = numpy.random.RandomState(0) x = tensor.matrix("x", dtype=THEANOTYPE) n_in = 3 n_hiddens = [10, 10] multi_lstm = MultiLayerLSTM(rng, x, n_in, n_hiddens, output_type="last") f0 = theano.function(inputs=[x], outputs=multi_lstm.output) save_file = data_io.smart_open("model.pkl.gz", "wb") multi_lstm.save(save_file) save_file.close() n_in1 = 4 n_hiddens1 = [11, 11] multi_lstm1 = MultiLayerLSTM(rng, x, n_in, n_hiddens, output_type="last") load_file = data_io.smart_open("model.pkl.gz", "rb") multi_lstm1.load(load_file) load_file.close() f1 = theano.function(inputs=[x], outputs=multi_lstm1.layers[0].output) n_data = 10 x0 = rng.randn(n_data, n_in).astype(THEANOTYPE) import pdb pdb.set_trace()
def test_saveload(): rng = numpy.random.RandomState(0) x = tensor.matrix("x", dtype=THEANOTYPE) n_in = 3 n_hidden = 10 lstm = LSTM(rng, x, n_in, n_hidden, output_type="last") n_data = 10 x0 = rng.randn(n_data, n_in).astype(THEANOTYPE) f0 = theano.function(inputs=[x], outputs=lstm.output) h0 = f0(x0) save_file = data_io.smart_open("model.pkl.gz", "wb") lstm.save(save_file) save_file.close() x1 = tensor.matrix("x1", dtype=THEANOTYPE) lstm1 = LSTM(rng, x1, n_in, n_hidden, output_type="last") load_file = data_io.smart_open("model.pkl.gz", "rb") f1 = theano.function(inputs=[x1], outputs=lstm1.output) h1 = f1(x0) lstm1.load(load_file) load_file.close() h2 = f1(x0) numpy.testing.assert_array_almost_equal(h0, h2)
def build_from_paths(paths: List[str], num_words: int = 50000, min_count: int = 1) -> Dict[str, int]: """ Creates vocabulary from paths to a file in sentence-per-line format. A sentence is just a whitespace delimited list of tokens. Note that special symbols like the beginning of sentence (BOS) symbol will be added to the vocabulary. :param paths: List of paths to files with one sentence per line. :param num_words: Maximum number of words in the vocabulary. :param min_count: Minimum occurrences of words to be included in the vocabulary. :return: Word-to-id mapping. """ with ExitStack() as stack: logger.info("Building vocabulary from dataset(s): %s", paths) files = (stack.enter_context(smart_open(path)) for path in paths) return build_vocab(chain(*files), num_words, min_count)
def decode_and_evaluate(self, checkpoint: Optional[int] = None, output_name: str = os.devnull) -> Dict[str, float]: """ Decodes data set and evaluates given a checkpoint. :param checkpoint: Checkpoint to load parameters from. :param output_name: Filename to write translations to. Defaults to /dev/null. :return: Mapping of metric names to scores. """ models, vocab_source, vocab_target = inference.load_models(self.context, self.max_input_len, self.beam_size, self.batch_size, [self.model], [checkpoint], softmax_temperature=self.softmax_temperature, max_output_length_num_stds=self.max_output_length_num_stds) translator = inference.Translator(self.context, self.ensemble_mode, self.bucket_width_source, inference.LengthPenalty(self.length_penalty_alpha, self.length_penalty_beta), models, vocab_source, vocab_target) trans_wall_time = 0.0 translations = [] with data_io.smart_open(output_name, 'w') as output: handler = output_handler.StringOutputHandler(output) tic = time.time() trans_inputs = [translator.make_input(i, line) for i, line in enumerate(self.input_sentences)] trans_outputs = translator.translate(trans_inputs) trans_wall_time = time.time() - tic for trans_input, trans_output in zip(trans_inputs, trans_outputs): handler.handle(trans_input, trans_output) translations.append(trans_output.translation) avg_time = trans_wall_time / len(self.input_sentences) # TODO(fhieber): eventually add more metrics (METEOR etc.) return {C.BLEU_VAL: evaluate.raw_corpus_bleu(hypotheses=translations, references=self.target_sentences, offset=0.01), C.CHRF_VAL: chrf.corpus_chrf(hypotheses=translations, references=self.target_sentences, trim_whitespaces=True), C.AVG_TIME: avg_time}
def load_lstm_mlp(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x = T.matrix("x") # flattened data of shape (n_data, d_in) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model model = lstm.MultiLayerLSTMMLP( rng, x, 39, options_dict["d_out"], options_dict["n_hiddens"], options_dict["hidden_layer_specs"], output_type="last", prefix="lstms") # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def lexicon_iterator(path: str, vocab_source: Dict[str, int], vocab_target: Dict[str, int]) -> Generator[Tuple[int, int, float], None, None]: """ Yields lines from a translation table of format: src, trg, logprob. :param path: Path to lexicon file. :param vocab_source: Source vocabulary. :param vocab_target: Target vocabulary. :return: Generator returning tuples (src_id, trg_id, prob). """ assert C.UNK_SYMBOL in vocab_source assert C.UNK_SYMBOL in vocab_target src_unk_id = vocab_source[C.UNK_SYMBOL] trg_unk_id = vocab_target[C.UNK_SYMBOL] with smart_open(path) as fin: for line in fin: src, trg, logprob = line.rstrip("\n").split("\t") prob = np.exp(float(logprob)) src_id = vocab_source.get(src, src_unk_id) trg_id = vocab_target.get(trg, trg_unk_id) yield src_id, trg_id, prob
def load_siamese_triplets_convlstm_minibatch(options_dict): model_fn = path.join(options_dict["model_dir"], "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) m1 = tensor.matrix("m1", dtype=THEANOTYPE) m2 = tensor.matrix("m2", dtype=THEANOTYPE) m3 = tensor.matrix("m3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None # Build model input_shape = (options_dict["batch_size"], 1, 200, 39) model = siamese.SiameseTripletBatchConvLSTM( rng, x1, x2, x3, m1, m2, m3, input_shape, filter_shape=options_dict["filter_shape"], n_lstm_hiddens=options_dict["n_hiddens"], n_outputs=options_dict["embedding_dim"], output_type=options_dict["sequence_output_type"], srng=srng, dropout=options_dict["dropout_rates"], use_dropout_regularization=options_dict["use_dropout_regularization"], stabilize_activations=options_dict["stabilize_activations"] ) # Load saved parameters logger.info("Reading: " + model_fn) f = data_io.smart_open(model_fn) model.load(f) f.close() return model
def get_output_handler(output_type: str, output_fname: Optional[str], sure_align_threshold: float) -> 'OutputHandler': """ :param output_type: Type of output handler. :param output_fname: Output filename. If none sys.stdout is used. :param sure_align_threshold: Threshold to consider an alignment link as 'sure'. :raises: ValueError for unknown output_type. :return: Output handler. """ output_stream = sys.stdout if output_fname is None else data_io.smart_open(output_fname, mode='w') if output_type == C.OUTPUT_HANDLER_TRANSLATION: return StringOutputHandler(output_stream) elif output_type == C.OUTPUT_HANDLER_TRANSLATION_WITH_SCORE: return StringWithScoreOutputHandler(output_stream) elif output_type == C.OUTPUT_HANDLER_TRANSLATION_WITH_ALIGNMENTS: return StringWithAlignmentsOutputHandler(output_stream, sure_align_threshold) elif output_type == C.OUTPUT_HANDLER_TRANSLATION_WITH_ALIGNMENT_MATRIX: return StringWithAlignmentMatrixOutputHandler(output_stream) elif output_type == C.OUTPUT_HANDLER_BENCHMARK: return BenchmarkOutputHandler(output_stream) elif output_type == C.OUTPUT_HANDLER_ALIGN_PLOT: return AlignPlotHandler(plot_prefix="align" if output_fname is None else output_fname) elif output_type == C.OUTPUT_HANDLER_ALIGN_TEXT: return AlignTextHandler(sure_align_threshold) elif output_type == C.OUTPUT_HANDLER_ALIGNMENT: return AlignmentsOutputHandler(output_stream) elif output_type == C.OUTPUT_HANDLER_JOINT: return JointOutputHandler(output_stream, mode='hard') elif output_type == C.OUTPUT_HANDLER_JOINT_SOFT: return JointOutputHandler(output_stream, mode='soft') elif output_type == C.OUTPUT_HANDLER_ALIGNMENT_ONE_HOT: return StringWithAlignmentOneHotMatrixOutputHandler(output_stream) else: raise ValueError("unknown output type")
def train_siamese_cnn(options_dict): # Preliminary logger.info(datetime.now()) if not path.isdir(options_dict["model_dir"]): os.makedirs(options_dict["model_dir"]) if "log_to_file" in options_dict and options_dict["log_to_file"] is True: log_fn = path.join(options_dict["model_dir"], "log") print "Writing:", log_fn root_logger = logging.getLogger() if len(root_logger.handlers) > 0: root_logger.removeHandler(root_logger.handlers[0]) # close open file handler logging.basicConfig(filename=log_fn, level=logging.DEBUG) else: logging.basicConfig(level=logging.DEBUG) rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None options_dict_fn = path.join(options_dict["model_dir"], "options_dict.pkl.gz") logger.info("Saving options: " + options_dict_fn) f = data_io.smart_open(options_dict_fn, "wb") pickle.dump(options_dict, f, -1) f.close() logger.info("Options: " + str(options_dict)) # Load and format data # Load into shared variables datasets = data_io.load_swbd_same_diff(rng, options_dict["data_dir"]) train_x, train_matches_vec, train_labels = datasets[0] dev_x, dev_matches_vec, dev_labels = datasets[1] test_x, test_matches_vec, test_labels = datasets[2] # Flatten data d_in = 39*200 train_x = train_x.reshape((-1, d_in)) dev_x = dev_x.reshape((-1, d_in)) test_x = test_x.reshape((-1, d_in)) # Make batch iterators train_batch_iterator = BatchIteratorSameDifferent( rng, train_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=True ) validate_batch_iterator = BatchIteratorSameDifferent( rng, dev_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False ) test_batch_iterator = BatchIteratorSameDifferent( rng, test_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False ) # Setup model logger.info("Building Siamese CNN") # Symbolic variables y = T.ivector("y") # indicates whether x1 and x2 is same (1) or different (0) x1 = T.matrix("x1") x2 = T.matrix("x2") x1_indices = T.ivector("x1_indices") x2_indices = T.ivector("x2_indices") # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = siamese.SiameseCNN( rng, x1, x2, input_shape, conv_layer_specs=options_dict["conv_layer_specs"], hidden_layer_specs=options_dict["hidden_layer_specs"], srng=srng, dropout_rates=options_dict["dropout_rates"], ) if options_dict["loss"] == "cos_cos2": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_cos_cos2(y) else: loss = model.loss_cos_cos2(y) error = model.loss_cos_cos2(y) # doesn't include regularization or dropout elif options_dict["loss"] == "cos_cos": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_cos_cos(y) else: loss = model.loss_cos_cos(y) error = model.loss_cos_cos(y) elif options_dict["loss"] == "cos_cos_margin": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_cos_cos_margin(y) else: loss = model.loss_cos_cos_margin(y) error = model.loss_cos_cos_margin(y) elif options_dict["loss"] == "euclidean_margin": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_euclidean_margin(y) else: loss = model.loss_euclidean_margin(y) error = model.loss_euclidean_margin(y) else: assert False, "Invalid loss: " + options_dict["loss"] # Add regularization if options_dict["l1_weight"] > 0. or options_dict["l2_weight"] > 0.: loss = loss + options_dict["l1_weight"]*model.l1 + options_dict["l2_weight"]* model.l2 # Compile test functions same_distance = model.cos_same(y) # track the distances of same and different pairs separately diff_distance = model.cos_diff(y) outputs = [error, loss, same_distance, diff_distance] theano_mode = theano.Mode(linker="cvm") test_model = theano.function( inputs=[x1_indices, x2_indices, y], outputs=outputs, givens={ x1: test_x[x1_indices], x2: test_x[x2_indices], }, mode=theano_mode, ) validate_model = theano.function( inputs=[x1_indices, x2_indices, y], outputs=outputs, givens={ x1: dev_x[x1_indices], x2: dev_x[x2_indices], }, mode=theano_mode, ) # Gradients and training updates parameters = model.parameters gradients = T.grad(loss, parameters) learning_rule = options_dict["learning_rule"] if learning_rule["type"] == "adadelta": updates = training.learning_rule_adadelta( parameters, gradients, learning_rule["rho"], learning_rule["epsilon"] ) elif learning_rule["type"] == "momentum": updates = training.learning_rule_momentum( parameters, gradients, learning_rule["learning_rate"], learning_rule["momentum"] ) else: assert False, "Invalid learning rule: " + learning_rule["type"] # Compile training function train_model = theano.function( inputs=[x1_indices, x2_indices, y], outputs=outputs, updates=updates, givens={ x1: train_x[x1_indices], x2: train_x[x2_indices], }, mode=theano_mode, ) # Train model logger.info("Training Siamese CNN") record_dict_fn = path.join(options_dict["model_dir"], "record_dict.pkl.gz") record_dict = training.train_fixed_epochs_with_validation( options_dict["n_max_epochs"], train_model=train_model, train_batch_iterator=train_batch_iterator, validate_model=validate_model, validate_batch_iterator=validate_batch_iterator, test_model=test_model, test_batch_iterator=test_batch_iterator, save_model_func=model.save, save_model_fn=path.join(options_dict["model_dir"], "model.pkl.gz"), record_dict_fn=record_dict_fn, ) # Extrinsic evaluation # Pass data trough model logger.info("Performing same-different evaluation") layers_output_dict = apply_layers.apply_layers(options_dict["model_dir"], "dev", batch_size=645) # batch size covers 10965 out of 10966 tokens utt_ids = sorted(layers_output_dict.keys()) embeddings = np.array([layers_output_dict[i] for i in utt_ids]) labels = data_io.swbd_utts_to_labels(utt_ids) # Perform same-different distances = pdist(embeddings, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) logger.info("Validation average precision: " + str(ap)) ap_fn = path.join(options_dict["model_dir"], "dev_ap.txt") with open(ap_fn, "w") as f: f.write(str(ap) + "\n")
def train_fixed_epochs_with_validation(n_epochs, train_model, train_batch_iterator, validate_model, validate_batch_iterator, test_model=None, test_batch_iterator=None, save_model_func=None, save_model_fn=None, record_dict_fn=None): """ Train for a fixed number of epochs, using validation to decide which model to save. Parameters ---------- train_model : Theano function Should take input from `train_batch_iterator` and output the training loss. The function can provide more than one output, which is averaged. This is useful for example to output both negative log likelihood (the model loss) and zero-one loss (the number of errors). train_batch_iterator : generator Provides the training batches. validate_model : Theano function Should take input from `validate_batch_iterator` and output the validation loss. The function can provide more than one output (which would be averaged), but for the validation only the first output will be used (except if `validate_extrinsic` is provided). validate_extrinsic : function Extrinsic evaluation can be performed using this function. If provided, validation is performed on the output of this function instead of using the output from `validate_model`. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the best model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. Return ------ record_dict : dict The dict key describes the statistic being tracked, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) record_dict["validation_loss"] = [] # validation is not necessarily performed every epoch if test_model is not None: record_dict["test_loss"] = [] # and neither is testing # if validate_extrinsic is not None: # record_dict["validation_extrinsic"] = [] record_dict["epoch_time"] = [] logger.info(datetime.now()) # Training epochs best_validation_loss0 = np.inf test_loss = np.inf i_epoch_best = 0 for i_epoch in xrange(n_epochs): # Loop over training batches # train_losses = [] start_time = timeit.default_timer() train_losses = [train_model(*batch) for batch in train_batch_iterator] # for i_batch in xrange(n_train_batches): # for batch in train_batch_iterator() # Calculate training loss for this batch and update parameters # train_losses.append(train_model(*batch)) # Validate the model validation_losses = [validate_model(*batch) for batch in validate_batch_iterator] validation_loss = np.mean(validation_losses, axis=0) logger.info("Epoch " + str(i_epoch + 1) + ": " "validation loss: " + str(validation_loss) ) record_dict["validation_loss"].append((i_epoch, validation_loss)) # print math.isnan(validation_loss) if hasattr(validation_loss, "__len__"): validation_loss0 = validation_loss[0] else: validation_loss0 = validation_loss # If this is the best model, test and save if validation_loss0 < best_validation_loss0: best_validation_loss0 = validation_loss0 i_epoch_best = i_epoch # Test model if test_model is not None: test_losses = [test_model(*batch) for batch in test_batch_iterator] test_loss = np.mean(test_losses, axis=0) logger.info(" Test loss: " + str(test_loss)) record_dict["test_loss"].append((i_epoch, test_loss)) # Write the best model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # Training statistics for this epoch end_time = timeit.default_timer() train_loss = np.mean(train_losses, axis=0) epoch_time = end_time - start_time # logger.info("Training loss: " + str(train_loss) # + ", " + # ) logger.info("Time: %f" % (epoch_time) + " sec, " + "training loss: " + str(train_loss) # + ", " + ) record_dict["epoch_time"].append((i_epoch, epoch_time)) record_dict["train_loss"].append((i_epoch, train_loss)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info("Training complete: %f min" % (total_time / 60.)) logger.info( "Best validation epoch: " + str(i_epoch_best + 1) + ", " "best validation loss: " + str(best_validation_loss0) ) if test_model is not None: logger.info("Test loss: " + str(test_loss)) if save_model_func is not None: logger.info("Best validation model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) logger.info(datetime.now()) return record_dict
def train_early_stopping(n_train_batches, n_validation_batches, train_model, validate_model, test_model=None, n_test_batches=None, n_max_epochs=1000, n_batches_validation_frequency=None, n_patience=5000, patience_increase_factor=2, improvement_threshold=0.995, save_model_func=None, save_model_fn=None, record_dict_fn=None, learning_rate_update=None): """ Train model using early stopping, using the provided training function. Parameters ---------- n_train_batches : int Total number of training batches. n_validation_batches : int Total number of validation batches. train_model : Theano function Should take as input a batch index and output the training loss and error (e.g. negative log likelihood and zero-one loss). validate_model : Theano function Should take as input a batch index and output the validation loss and error. test_model : Theano function Should take as input a batch index and output the test loss and error. If not provided, testing is not performed over the training iterations. n_test_batches : int Total number of test batches. n_batches_validation_frequency : int Number of batches between calculating the validation error; if not provided, is set to min(n_train_batches, n_patience / 2) which means that at a minimum validation will be performed every epoch (i.e. every time after seeing `n_train_batches` batches). n_patience : int Number of minibatches to consider at a minimum before completing training. patience_increase_factor : int When a new validation minimum is found, the number of seen minibatches are multiplied by this factor to give the new minimum number of minibatches before stopping. improvement_threshold : float The minimum relative improvement in validation error to be warrant an increase in `n_patience` by `patience_increase_factor`. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the current model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. learning_rate_update : Theano function If provided, this function is called (without any parameters) at the beginning of every epoch to update the learning rate. Return ------ record_dict : dict The dict key describes the statistic being tract, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ assert (save_model_func is None) or (save_model_fn is not None) assert (test_model is None) or (n_test_batches is not None) # Set default if not provided if n_batches_validation_frequency is None: n_batches_validation_frequency = min(n_train_batches, n_patience / 2) record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) record_dict["train_error"] = [] record_dict["validation_loss"] = [] # validation is not necessarily performed every epoch record_dict["validation_error"] = [] if test_model is not None: record_dict["test_loss"] = [] # and neither is testing record_dict["test_error"] = [] record_dict["epoch_time"] = [] # Training epochs i_epoch = 0 done_looping = False best_validation_error = np.inf n_batches_best = 0 i_epoch_best = 0 while (i_epoch < n_max_epochs) and (not done_looping): train_losses = [] train_errors = [] start_time = timeit.default_timer() if learning_rate_update is not None: learning_rate = learning_rate_update(i_epoch) # Minibatches for i_batch in xrange(n_train_batches): # Calculate cost for this minibatch, updating the parameters minibatch_train_loss, minibatch_train_errors = train_model(i_batch) train_errors.append(minibatch_train_errors) train_losses.append(minibatch_train_loss) # print train_losses # print i_batch, train_model(i_batch) # break n_seen_batches = i_epoch * n_train_batches + i_batch # Use n_seen_batches + 1 to avoid checking very first batch if (n_seen_batches + 1) % n_batches_validation_frequency == 0: # Validate model validation_losses_errors = [validate_model(i) for i in xrange(n_validation_batches)] validation_loss = np.mean([i[0] for i in validation_losses_errors]) validation_error = np.mean([i[1] for i in validation_losses_errors]) logger.info( "Validation: epoch %i, minibatch %i/%i, loss %f, error %.2f%%" % (i_epoch + 1, i_batch + 1, n_train_batches, validation_loss, validation_error * 100.) ) record_dict["validation_loss"].append((i_epoch, validation_loss)) record_dict["validation_error"].append((i_epoch, validation_error)) # Check validation to see if we have new best model if validation_error < best_validation_error: if validation_error < best_validation_error * improvement_threshold: n_patience = max(n_patience, n_seen_batches * patience_increase_factor) best_validation_error = validation_error n_batches_best = n_seen_batches i_epoch_best = i_epoch if test_model is not None: # test_losses = [test_model(i) for i in xrange(n_test_batches)] test_losses_errors = [test_model(i) for i in xrange(n_test_batches)] test_loss = np.mean([i[0] for i in test_losses_errors]) test_error = np.mean([i[1] for i in test_losses_errors]) logger.info("\tTest: loss %f, error %.2f%%" % (test_loss, test_error * 100.)) # logger.info( # "Test: epoch %i, minibatch %i/%i, error %f%%" % # (i_epoch + 1, i_batch + 1, n_train_batches, test_loss * 100) # ) record_dict["test_loss"].append((i_epoch, test_loss)) record_dict["test_error"].append((i_epoch, test_error)) # Write the best model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # break # Check if training is done if n_patience <= n_seen_batches: done_looping = True break end_time = timeit.default_timer() epoch_time = end_time - start_time record_dict["epoch_time"].append((i_epoch, epoch_time)) # print train_losses # print train_errors cur_train_loss = np.mean(train_losses) cur_train_error = np.mean(train_errors) if learning_rate_update is not None: logger.info( "Train: lr %f, epoch %i, %f sec/epoch, loss %f, error %.2f%%" % ( learning_rate, i_epoch + 1, epoch_time, cur_train_loss, cur_train_error*100. ) ) else: logger.info( "Train: epoch %i, %f sec/epoch, loss %f, error %.2f%%" % ( i_epoch + 1, epoch_time, cur_train_loss, cur_train_error*100. ) ) record_dict["train_loss"].append((i_epoch, cur_train_loss)) record_dict["train_error"].append((i_epoch, cur_train_error)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() i_epoch += 1 total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info( "Training complete: %d epochs, %f sec/epoch, total time %f min" % ( i_epoch, 1. * total_time / i_epoch, total_time / 60. ) ) logger.info( "Best validation: after seeing %d minibatches in epoch %d, error %.2f%%" % (n_batches_best, i_epoch_best + 1, best_validation_error * 100.) ) if test_model is not None: logger.info("Test error: %.2f%%" % (test_error * 100.)) if save_model_func is not None: logger.info("Best validation model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) return record_dict
def test_siamese_triplet_batch_save_load(): testdir = "train_siamese_triplets_convlstm_tmp_testdir" options_dict = default_options_dict.copy() rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None if not path.isdir(testdir): os.makedirs(testdir) model_fn = path.join(testdir, "model.pkl.gz") # Symbolic variables x1 = tensor.matrix("x1", dtype=THEANOTYPE) x2 = tensor.matrix("x2", dtype=THEANOTYPE) x3 = tensor.matrix("x3", dtype=THEANOTYPE) m1 = tensor.matrix("m1", dtype=THEANOTYPE) m2 = tensor.matrix("m2", dtype=THEANOTYPE) m3 = tensor.matrix("m3", dtype=THEANOTYPE) # Random number generators rng = np.random.RandomState(options_dict["rnd_seed"]) # Build model input_shape = (options_dict["batch_size"], 1, 200, 39) model = siamese.SiameseTripletBatchConvLSTM( rng, x1, x2, x3, m1, m2, m3, input_shape, filter_shape=options_dict["filter_shape"], n_lstm_hiddens=options_dict["n_hiddens"], output_type=options_dict["sequence_output_type"], srng=srng, dropout=options_dict["dropout_rates"]) run_model = theano.function( inputs=[model.input, model.mask], outputs=model.output) x0 = rng.randn(options_dict["batch_size"], 200, 39) m0 = rng.rand(options_dict["batch_size"], 200).T y0 = run_model(x0, m0) f = data_io.smart_open(model_fn, "wb") model.save(f) f.close() model = siamese.SiameseTripletBatchConvLSTM( rng, x1, x2, x3, m1, m2, m3, input_shape, filter_shape=options_dict["filter_shape"], n_lstm_hiddens=options_dict["n_hiddens"], output_type=options_dict["sequence_output_type"], srng=srng, dropout=options_dict["dropout_rates"]) f = data_io.smart_open(model_fn, "rb") model.load(f) f.close() run_model = theano.function( inputs=[model.input, model.mask], outputs=model.output) y1 = run_model(x0, m0) shutil.rmtree(testdir) np.testing.assert_array_almost_equal(y1, y0)
def train_fixed_epochs(n_epochs, train_model, train_batch_iterator, test_model=None, test_batch_iterator=None, save_model_func=None, save_model_fn=None, record_dict_fn=None): """ Train for a fixed number of epochs. Parameters ---------- train_model : Theano function Should take input from `train_batch_iterator` and output the training loss. The function can provide more than one output, which is averaged. This is useful for example to output both negative log likelihood (the model loss) and zero-one loss (the number of errors). train_batch_iterator : generator Provides the training batches. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the best model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. Return ------ record_dict : dict The dict key describes the statistic being tracked, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) if test_model is not None: record_dict["test_loss"] = [] # testing is not necessarily performed every epoch record_dict["epoch_time"] = [] logger.info(datetime.now()) # Training epochs i_epoch_best = 0 test_loss = np.inf for i_epoch in xrange(n_epochs): logger.info("Epoch " + str(i_epoch + 1) + ":") # Loop over training batches # train_losses = [] start_time = timeit.default_timer() train_losses = [train_model(*batch) for batch in train_batch_iterator] # for i_batch in xrange(n_train_batches): # for batch in train_batch_iterator() # Calculate training loss for this batch and update parameters # train_losses.append(train_model(*batch)) # Test model if test_model is not None: test_losses = [test_model(*batch) for batch in test_batch_iterator] test_loss = np.mean(test_losses, axis=0) logger.info(" Test loss: " + str(test_loss)) record_dict["test_loss"].append((i_epoch, test_loss)) # Write this model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # Training statistics for this epoch end_time = timeit.default_timer() train_loss = np.mean(train_losses, axis=0) epoch_time = end_time - start_time # logger.info("Training loss: " + str(train_loss) # + ", " + # ) logger.info("Time: %f" % (epoch_time) + " sec, " + "training loss: " + str(train_loss) # + ", " + ) record_dict["epoch_time"].append((i_epoch, epoch_time)) record_dict["train_loss"].append((i_epoch, train_loss)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info("Training complete: %f min" % (total_time / 60.)) if test_model is not None: logger.info("Test loss: " + str(test_loss)) if save_model_func is not None: logger.info("Model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) logger.info(datetime.now()) return record_dict
def apply_layers(model_dir, set, batch_size=None, i_layer=-1): logger.info(datetime.now()) # Load the model options options_dict_fn = path.join(model_dir, "options_dict.pkl.gz") logger.info("Reading: " + options_dict_fn) f = data_io.smart_open(options_dict_fn) options_dict = pickle.load(f) # print options_dict f.close() # Load the dataset npz_fn = path.join(options_dict["data_dir"], "swbd." + set + ".npz") logger.info("Reading: " + npz_fn) npz = numpy.load(npz_fn) logger.info("Loaded " + str(len(npz.keys())) + " segments") model = train_siamese_triplets_convlstm.load_siamese_triplets_convlstm_minibatch(options_dict) # Load data into Theano shared variable utt_ids = sorted(npz.keys()) xs = [npz[i] for i in utt_ids] ls = numpy.asarray([len(x) for x in xs], dtype=int) max_length = 200 batch_size = options_dict["batch_size"] n_batches = (len(ls) - 1)/100 + 1 blocked_size = n_batches * batch_size xs = numpy.zeros((blocked_size, max_length, npz[utt_ids[0]].shape[1]), dtype=theano.config.floatX) mask = numpy.zeros((blocked_size, max_length), dtype=theano.config.floatX) for j, i in enumerate(utt_ids): xs[j][:ls[j]] = npz[i] mask[j][:ls[j]] = 1.0 logger.info("Formatting into Theano shared variable") shared_x = theano.shared(xs, borrow=True) shared_mask = theano.shared(mask, borrow=True) # Compile function for passing segments through CNN layers x = model.input # input to the tied layers x_i = T.lscalar() m = model.mask normalized_output = model.output apply_model = theano.function( inputs=[x_i], outputs=normalized_output, givens={ x: shared_x[x_i*batch_size:batch_size*(x_i+1)], m: shared_mask[batch_size*x_i:batch_size*(x_i+1)].T } ) logger.info(datetime.now()) n_x = len(ls) logger.info("Passing data through in model: " + str(n_x)) embeddings = [] for x_i in range(n_batches): x_embeddings = apply_model(x_i) embeddings.extend(x_embeddings) embeddings = numpy.vstack(embeddings[:len(ls)]) logger.info("Outputs shape: " + str(embeddings.shape)) embeddings_dict = {} for embedding_i, embedding in enumerate(embeddings): utt_id = utt_ids[embedding_i] embeddings_dict[utt_id] = embedding logger.info(datetime.now()) return embeddings_dict
def train_siamese_triplets_lstm_nn(options_dict): """Train and save a Siamese CNN using the specified options.""" # Preliminary logger.info(datetime.now()) if not path.isdir(options_dict["model_dir"]): os.makedirs(options_dict["model_dir"]) if "log_to_file" in options_dict and options_dict["log_to_file"] is True: log_fn = path.join(options_dict["model_dir"], "log") print "Writing:", log_fn root_logger = logging.getLogger() if len(root_logger.handlers) > 0: root_logger.removeHandler(root_logger.handlers[0]) # close open file handler logging.basicConfig(filename=log_fn, level=logging.DEBUG) else: logging.basicConfig(level=logging.DEBUG) rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None options_dict_fn = path.join(options_dict["model_dir"], "options_dict.pkl.gz") logger.info("Saving options: " + options_dict_fn) f = data_io.smart_open(options_dict_fn, "wb") pickle.dump(options_dict, f, -1) f.close() logger.info("Options: " + str(options_dict)) # Load and format data # Load into shared variables datasets = data_io.load_swbd_same_diff_mask(rng, options_dict["data_dir"]) train_x, train_mask, train_lengths, train_matches_vec, train_labels = datasets[0] dev_x, dev_mask, dev_lengths, dev_matches_vec, dev_labels = datasets[1] test_x, test_mask, test_lengths, test_matches_vec, test_labels = datasets[2] # Make batch iterators train_triplet_iterator = BatchIteratorTriplets( rng, train_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=True, ) validate_triplet_iterator = BatchIteratorTriplets( rng, dev_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False, ) test_triplet_iterator = BatchIteratorTriplets( rng, test_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False, ) # Setup model logger.info("Building Siamese triplets LSTM") # Symbolic variables x1 = tensor.tensor3("x1", dtype=THEANOTYPE) x2 = tensor.tensor3("x2", dtype=THEANOTYPE) x3 = tensor.tensor3("x3", dtype=THEANOTYPE) m1 = tensor.matrix("m1", dtype=THEANOTYPE) m2 = tensor.matrix("m2", dtype=THEANOTYPE) m3 = tensor.matrix("m3", dtype=THEANOTYPE) x1_indices = tensor.ivector("x1_indices") x2_indices = tensor.ivector("x2_indices") x3_indices = tensor.ivector("x3_indices") l1 = tensor.iscalar("l1") l2 = tensor.iscalar("l2") l3 = tensor.iscalar("l3") # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = siamese.SiameseTripletBatchLSTMNN( rng, x1, x2, x3, m1, m2, m3, n_in=39, n_lstm_hiddens=options_dict["n_hiddens"], mlp_hidden_specs=options_dict["hidden_layer_specs"], ) if options_dict["loss"] == "hinge_cos": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_hinge_cos(options_dict["margin"]) else: loss = model.loss_hinge_cos(options_dict["margin"]) error = model.loss_hinge_cos(options_dict["margin"]) # doesn't include regularization or dropout else: assert False, "Invalid loss: " + options_dict["loss"] # Add regularization if options_dict["l2_weight"] > 0.0: loss = loss + options_dict["l2_weight"] * model.l2 # Compile test functions same_distance = model.cos_same() # track the distances of same and different pairs separately diff_distance = model.cos_diff() outputs = [error, loss, same_distance, diff_distance] theano_mode = theano.Mode(linker="cvm") validate_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, givens={ x1: dev_x[x1_indices].swapaxes(0, 1)[: dev_lengths[x1_indices].max()], m1: dev_mask[x1_indices].T[: dev_lengths[x1_indices].max()], x2: dev_x[x2_indices].swapaxes(0, 1)[: dev_lengths[x2_indices].max()], m2: dev_mask[x2_indices].T[: dev_lengths[x2_indices].max()], x3: dev_x[x3_indices].swapaxes(0, 1)[: dev_lengths[x3_indices].max()], m3: dev_mask[x3_indices].T[: dev_lengths[x3_indices].max()], }, mode=theano_mode, ) test_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, givens={ x1: test_x[x1_indices].swapaxes(0, 1)[: test_lengths[x1_indices].max()], m1: test_mask[x1_indices].T[: test_lengths[x1_indices].max()], x2: test_x[x2_indices].swapaxes(0, 1)[: test_lengths[x2_indices].max()], m2: test_mask[x2_indices].T[: test_lengths[x2_indices].max()], x3: test_x[x3_indices].swapaxes(0, 1)[: test_lengths[x3_indices].max()], m3: test_mask[x3_indices].T[: test_lengths[x3_indices].max()], }, mode=theano_mode, ) # test_model = theano.function( # inputs=[x1_indices, x2_indices, x3_indices], # outputs=outputs, # givens={ # l1: test_lengths[x1_indices].max(), # x1: test_x[x1_indices].swapaxes(0, 1)[:l1], # m1: test_mask[x1_indices][:l1], # l2: test_lengths[x2_indices].max(), # x2: test_x[x2_indices].swapaxes(0, 1)[:l2], # m2: test_mask[x2_indices][:l2], # l3: test_lengths[x3_indices].max(), # x3: test_x[x3_indices].swapaxes(0, 1)[:l3], # m3: test_mask[x3_indices][:l3], # }, # mode=theano_mode, # ) # Gradients and training updates parameters = model.parameters gradients = tensor.grad(loss, parameters) learning_rule = options_dict["learning_rule"] if learning_rule["type"] == "adadelta": updates = training.learning_rule_adadelta(parameters, gradients, learning_rule["rho"], learning_rule["epsilon"]) elif learning_rule["type"] == "momentum": updates = training.learning_rule_momentum( parameters, gradients, learning_rule["learning_rate"], learning_rule["momentum"] ) else: assert False, "Invalid learning rule: " + learning_rule["type"] # Compile training function train_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, updates=updates, givens={ x1: train_x[x1_indices].swapaxes(0, 1)[: train_lengths[x1_indices].max()], m1: train_mask[x1_indices].T[: train_lengths[x1_indices].max()], x2: train_x[x2_indices].swapaxes(0, 1)[: train_lengths[x2_indices].max()], m2: train_mask[x2_indices].T[: train_lengths[x2_indices].max()], x3: train_x[x3_indices].swapaxes(0, 1)[: train_lengths[x3_indices].max()], m3: train_mask[x3_indices].T[: train_lengths[x3_indices].max()], }, mode=theano_mode, ) # train_model = theano.function( # inputs=[x1_indices, x2_indices, x3_indices], # outputs=outputs, # updates=updates, # givens={ # l1: train_lengths[x1_indices].max(), # x1: train_x[x1_indices].swapaxes(0, 1)[:l1], # m1: train_mask[x1_indices][:l1], # l2: train_lengths[x2_indices].max(), # x2: train_x[x2_indices].swapaxes(0, 1)[:l2], # m2: train_mask[x2_indices][:l2], # l3: train_lengths[x3_indices].max(), # x3: train_x[x3_indices].swapaxes(0, 1)[:l3], # m3: train_mask[x3_indices][:l3], # }, # mode=theano_mode, # ) # Train model logger.info("Training Siamese triplets CNN") record_dict_fn = path.join(options_dict["model_dir"], "record_dict.pkl.gz") record_dict = training.train_fixed_epochs_with_validation( options_dict["n_max_epochs"], train_model=train_model, train_triplet_iterator=train_triplet_iterator, validate_model=validate_model, validate_triplet_iterator=validate_triplet_iterator, test_model=test_model, test_triplet_iterator=test_triplet_iterator, save_model_func=model.save, save_model_fn=path.join(options_dict["model_dir"], "model.pkl.gz"), record_dict_fn=record_dict_fn, ) # Extrinsic evaluation # Pass data trough model logger.info("Performing same-different evaluation") layers_output_dict = apply_layers.apply_layers( options_dict["model_dir"], "dev", batch_size=645 ) # batch size covers 10965 out of 10966 tokens utt_ids = sorted(layers_output_dict.keys()) embeddings = np.array([layers_output_dict[i] for i in utt_ids]) labels = data_io.swbd_utts_to_labels(utt_ids) # Perform same-different distances = pdist(embeddings, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) logger.info("Validation average precision: " + str(ap)) ap_fn = path.join(options_dict["model_dir"], "dev_ap.txt") with open(ap_fn, "w") as f: f.write(str(ap) + "\n")
def apply_layers(model_dir, set, batch_size=None, i_layer=-1): logger.info(datetime.now()) # Load the model options options_dict_fn = path.join(model_dir, "options_dict.pkl.gz") logger.info("Reading: " + options_dict_fn) f = data_io.smart_open(options_dict_fn) options_dict = pickle.load(f) # print options_dict f.close() # Load the dataset npz_fn = path.join(options_dict["data_dir"], "swbd." + set + ".npz") logger.info("Reading: " + npz_fn) npz = numpy.load(npz_fn) logger.info("Loaded " + str(len(npz.keys())) + " segments") if "siamese_triplets" in options_dict["model_dir"]: model = siamese_triplets_lstm.load_siamese_triplets_lstm(options_dict) # Load data into Theano shared variable utt_ids = sorted(npz.keys()) xs = [npz[i] for i in utt_ids] ls = numpy.asarray([len(x) for x in xs], dtype=int) base_inds = numpy.cumsum(ls) ends = theano.shared(base_inds, borrow=True) base_begins = base_inds.copy() base_begins[1:] = base_inds[:-1] base_begins[0] = 0 begins = theano.shared(base_begins, borrow=True) logger.info("Formatting into Theano shared variable") shared_x = theano.shared(numpy.asarray( numpy.vstack(xs), dtype=siamese_triplets_lstm.THEANOTYPE), borrow=True) # Compile function for passing segments through CNN layers x = model.input # input to the tied layers x_i = T.lscalar() normalized_output = model.output apply_model = theano.function(inputs=[x_i], outputs=normalized_output, givens={x: shared_x[begins[x_i]:ends[x_i]]}) logger.info(datetime.now()) n_x = len(ls) logger.info("Passing data through in model: " + str(n_x)) embeddings = [] for x_i in range(n_x): x_embedding = apply_model(x_i) embeddings.append(x_embedding) embeddings = numpy.vstack(embeddings) logger.info("Outputs shape: " + str(embeddings.shape)) embeddings_dict = {} for embedding_i, embedding in enumerate(embeddings): utt_id = utt_ids[embedding_i] embeddings_dict[utt_id] = embedding logger.info(datetime.now()) return embeddings_dict
def train_mlp(options_dict): """Train and save a word classifier MLP.""" # Preliminary logger.info(datetime.now()) if not path.isdir(options_dict["model_dir"]): os.makedirs(options_dict["model_dir"]) if "log_to_file" in options_dict and options_dict["log_to_file"] is True: log_fn = path.join(options_dict["model_dir"], "log") print "Writing:", log_fn root_logger = logging.getLogger() if len(root_logger.handlers) > 0: root_logger.removeHandler( root_logger.handlers[0]) # close open file handler logging.basicConfig(filename=log_fn, level=logging.DEBUG) else: logging.basicConfig(level=logging.DEBUG) rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None # Load and format data # Load into shared variables datasets, word_to_i_map = data_io.load_swbd_labelled( rng, options_dict["data_dir"], options_dict["min_count"]) train_x, train_y = datasets[0] dev_x, dev_y = datasets[1] test_x, test_y = datasets[2] # Get batch sizes and iterators class BatchIterator(object): def __init__(self, n_batches): self.n_batches = n_batches def __iter__(self): for i_batch in xrange(self.n_batches): yield [i_batch] n_train_batches = train_x.get_value( borrow=True).shape[0] / options_dict["batch_size"] n_dev_batches = dev_x.get_value( borrow=True).shape[0] / options_dict["batch_size"] n_test_batches = test_x.get_value( borrow=True).shape[0] / options_dict["batch_size"] train_batch_iterator = BatchIterator(n_train_batches) validate_batch_iterator = BatchIterator(n_dev_batches) test_batch_iterator = BatchIterator(n_test_batches) # Flatten data d_in = 39 * 200 train_x = train_x.reshape((-1, d_in)) dev_x = dev_x.reshape((-1, d_in)) test_x = test_x.reshape((-1, d_in)) d_out = len(word_to_i_map) options_dict["d_out"] = d_out # Save `options_dict` options_dict_fn = path.join(options_dict["model_dir"], "options_dict.pkl.gz") logger.info("Saving options: " + options_dict_fn) f = data_io.smart_open(options_dict_fn, "wb") pickle.dump(options_dict, f, -1) f.close() logger.info("Options: " + str(options_dict)) # Setup model logger.info("Building MLP") # Symbolic variables i_batch = T.lscalar() # batch index x = T.matrix("x") # flattened data of shape (n_data, d_in) y = T.ivector("y") # labels # Build model logger.info("No. of word type targets: " + str(options_dict["d_out"])) model = mlp.MLP(rng, x, d_in, options_dict["d_out"], options_dict["hidden_layer_specs"], srng, options_dict["dropout_rates"]) if options_dict["dropout_rates"] is not None: loss = model.dropout_negative_log_likelihood(y) else: loss = model.negative_log_likelihood(y) error = model.errors(y) # Add regularization if options_dict["l1_weight"] > 0. or options_dict["l2_weight"] > 0.: loss = loss + options_dict["l1_weight"] * model.l1 + options_dict[ "l2_weight"] * model.l2 # Compile test functions outputs = [error, loss] validate_model = theano.function( inputs=[i_batch], outputs=outputs, givens={ x: dev_x[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]], y: dev_y[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]] }) test_model = theano.function( inputs=[i_batch], outputs=outputs, givens={ x: test_x[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]], y: test_y[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]] }) # Gradients and training updates parameters = model.parameters gradients = T.grad(loss, parameters) learning_rule = options_dict["learning_rule"] if learning_rule["type"] == "adadelta": updates = training.learning_rule_adadelta(parameters, gradients, learning_rule["rho"], learning_rule["epsilon"]) elif learning_rule["type"] == "momentum": updates = training.learning_rule_momentum( parameters, gradients, learning_rule["learning_rate"], learning_rule["momentum"]) else: assert False, "Invalid learning rule: " + learning_rule["type"] # Compile training function train_model = theano.function( inputs=[i_batch], outputs=outputs, updates=updates, givens={ x: train_x[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]], y: train_y[i_batch * options_dict["batch_size"]:(i_batch + 1) * options_dict["batch_size"]] }, ) # Train model logger.info("Training MLP") record_dict_fn = path.join(options_dict["model_dir"], "record_dict.pkl.gz") record_dict = training.train_fixed_epochs_with_validation( options_dict["n_max_epochs"], train_model=train_model, train_batch_iterator=train_batch_iterator, validate_model=validate_model, validate_batch_iterator=validate_batch_iterator, test_model=test_model, test_batch_iterator=test_batch_iterator, save_model_func=model.save, save_model_fn=path.join(options_dict["model_dir"], "model.pkl.gz"), record_dict_fn=record_dict_fn, ) # Extrinsic evaluation # Pass data trough model logger.info("Performing same-different evaluation") layers_output_dict = apply_layers.apply_layers( options_dict["model_dir"], "dev", batch_size=645, i_layer=options_dict["i_layer_eval"]) utt_ids = sorted(layers_output_dict.keys()) embeddings = np.array([layers_output_dict[i] for i in utt_ids]) labels = data_io.swbd_utts_to_labels(utt_ids) # Perform same-different distances = pdist(embeddings, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) logger.info("Validation average precision: " + str(ap)) ap_fn = path.join(options_dict["model_dir"], "dev_ap.txt") with open(ap_fn, "w") as f: f.write(str(ap) + "\n")
def main(): args = check_argv() if "," in args.model_basedir: directory_list = [] for model_basedir in args.model_basedir.split(","): directory_list += glob.glob(path.join(model_basedir, "*")) print directory_list else: directory_list = glob.glob(path.join(args.model_basedir, "*")) # Get results from directories results = [] # list of (dir, option_value_dict, performance) for d in directory_list: if path.isdir(d): hash = path.split(d)[-1] # print d, hash options_dict_fn = path.join(d, "options_dict.pkl.gz") if not path.isfile(options_dict_fn): continue print "Reading:", options_dict_fn f = smart_open(options_dict_fn) options_dict = pickle.load(f) f.close() # Data filter if data_dir_filter is not None: if not data_dir_filter in options_dict["data_dir"]: continue # Read average precision ap_fn = path.join(d, "dev_ap.txt") if not path.isfile(ap_fn): continue with open(ap_fn) as f: ap = float(f.readline().strip()) # Get the options we are interested in options = {} if "min_count" in options_dict: options["min_count"] = options_dict["min_count"] else: options["min_count"] = None if "conv_layer_specs" in options_dict: options["n_cnn_units"] = options_dict["conv_layer_specs"][0][ "filter_shape"][0] else: options["n_cnn_units"] = None options["n_hidden_units"] = options_dict["hidden_layer_specs"][0][ "units"] options["n_hidden_layers"] = len( options_dict["hidden_layer_specs"]) options["n_hidden_units_final_layer"] = options_dict[ "hidden_layer_specs"][-1]["units"] for key in options_monitor: if key in options_dict: options[key] = options_dict[key] else: options[key] = None results.append((d, options, ap)) # Try to sort the results according to the option_value_dict results = sorted(results, key=lambda i: i[1].values()) # Present results options = results[0][1].keys() print "Possible options:", options print_options = sorted(options) # or can give a filtered list here print print_options print print "-" * 39 print "# Directory\t" + "\t".join(print_options) + "\tDev AP" for dir, options, ap in results: print dir + "\t" + "\t".join([str(options[i]) for i in print_options]) + "\t" + str(ap) print "-" * 39
def apply_layers(model_dir, set, batch_size=None, i_layer=-1): logger.info(datetime.now()) # Load the model options options_dict_fn = path.join(model_dir, "options_dict.pkl.gz") logger.info("Reading: " + options_dict_fn) f = data_io.smart_open(options_dict_fn) options_dict = pickle.load(f) # print options_dict f.close() # Load the dataset npz_fn = path.join(options_dict["data_dir"], "swbd." + set + ".npz") logger.info("Reading: " + npz_fn) npz = numpy.load(npz_fn) logger.info("Loaded " + str(len(npz.keys())) + " segments") model = load_model(options_dict) # Load data into Theano shared variable utt_ids = sorted(npz.keys()) xs = [npz[i] for i in utt_ids] ls = numpy.asarray([len(x) for x in xs], dtype=int) base_inds = numpy.cumsum(ls) ends = theano.shared(base_inds, borrow=True) base_begins = base_inds.copy() base_begins[1:] = base_inds[:-1] base_begins[0] = 0 begins = theano.shared(base_begins, borrow=True) logger.info("Formatting into Theano shared variable") shared_x = theano.shared(numpy.asarray( numpy.vstack(xs), dtype=siamese_triplets_lstm.THEANOTYPE), borrow=True) # Compile function for passing segments through CNN layers x = model.input # input to the tied layers x_i = T.lscalar() normalized_output = model.output apply_model = theano.function( inputs=[x_i], outputs=normalized_output, givens={ x: shared_x[ begins[x_i]:ends[x_i] ] } ) logger.info(datetime.now()) n_x = len(ls) logger.info("Passing data through in model: " + str(n_x)) embeddings = [] for x_i in range(n_x): x_embedding = apply_model(x_i) embeddings.append(x_embedding) embeddings = numpy.vstack(embeddings) logger.info("Outputs shape: " + str(embeddings.shape)) embeddings_dict = {} for embedding_i, embedding in enumerate(embeddings): utt_id = utt_ids[embedding_i] embeddings_dict[utt_id] = embedding logger.info(datetime.now()) return embeddings_dict
def main(): args = check_argv() if "," in args.model_basedir: directory_list = [] for model_basedir in args.model_basedir.split(","): directory_list += glob.glob(path.join(model_basedir, "*")) print directory_list else: directory_list = glob.glob(path.join(args.model_basedir, "*")) # Get results from directories results = [] # list of (dir, option_value_dict, performance) for d in directory_list: if path.isdir(d): hash = path.split(d)[-1] # print d, hash options_dict_fn = path.join(d, "options_dict.pkl.gz") if not path.isfile(options_dict_fn): continue print "Reading:", options_dict_fn f = smart_open(options_dict_fn) options_dict = pickle.load(f) f.close() # Data filter if data_dir_filter is not None: if not data_dir_filter in options_dict["data_dir"]: continue # Read average precision ap_fn = path.join(d, "dev_ap.txt") if not path.isfile(ap_fn): continue with open(ap_fn) as f: ap = float(f.readline().strip()) # Get the options we are interested in options = {} if "min_count" in options_dict: options["min_count"] = options_dict["min_count"] else: options["min_count"] = None if "conv_layer_specs" in options_dict: options["n_cnn_units"] = options_dict["conv_layer_specs"][0]["filter_shape"][0] else: options["n_cnn_units"] = None options["n_hidden_units"] = options_dict["hidden_layer_specs"][0]["units"] options["n_hidden_layers"] = len(options_dict["hidden_layer_specs"]) options["n_hidden_units_final_layer"] = options_dict["hidden_layer_specs"][-1]["units"] for key in options_monitor: if key in options_dict: options[key] = options_dict[key] else: options[key] = None results.append((d, options, ap)) # Try to sort the results according to the option_value_dict results = sorted(results, key=lambda i:i[1].values()) # Present results options = results[0][1].keys() print "Possible options:", options print_options = sorted(options) # or can give a filtered list here print print_options print print "-"*39 print "# Directory\t" + "\t".join(print_options) + "\tDev AP" for dir, options, ap in results: print dir + "\t" + "\t".join([str(options[i]) for i in print_options]) + "\t" + str(ap) print "-"*39
def train_cnn(options_dict): """Train and save a word classifier CNN.""" # Preliminary logger.info(datetime.now()) if not path.isdir(options_dict["model_dir"]): os.makedirs(options_dict["model_dir"]) if "log_to_file" in options_dict and options_dict["log_to_file"] is True: log_fn = path.join(options_dict["model_dir"], "log") print "Writing:", log_fn root_logger = logging.getLogger() if len(root_logger.handlers) > 0: root_logger.removeHandler(root_logger.handlers[0]) # close open file handler logging.basicConfig(filename=log_fn, level=logging.DEBUG) # root_logger = logging.getLogger() # formatter = root_logger.handlers[0].formatter # root_logger.removeHandler(root_logger.handlers[0]) # file_handler = logging.FileHandler(log_fn, "a") # file_handler.setFormatter(formatter) # root_logger.addHandler(file_handler) else: logging.basicConfig(level=logging.DEBUG) rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None # Load and format data # Load into shared variables datasets, word_to_i_map = data_io.load_swbd_labelled(rng, options_dict["data_dir"], options_dict["min_count"]) train_x, train_y = datasets[0] dev_x, dev_y = datasets[1] test_x, test_y = datasets[2] # Get batch sizes and iterators class BatchIterator(object): def __init__(self, n_batches): self.n_batches = n_batches def __iter__(self): for i_batch in xrange(self.n_batches): yield [i_batch] n_train_batches = train_x.get_value(borrow=True).shape[0] / options_dict["batch_size"] n_dev_batches = dev_x.get_value(borrow=True).shape[0] / options_dict["batch_size"] n_test_batches = test_x.get_value(borrow=True).shape[0] / options_dict["batch_size"] train_batch_iterator = BatchIterator(n_train_batches) validate_batch_iterator = BatchIterator(n_dev_batches) test_batch_iterator = BatchIterator(n_test_batches) # Flatten data d_in = 39*200 train_x = train_x.reshape((-1, d_in)) dev_x = dev_x.reshape((-1, d_in)) test_x = test_x.reshape((-1, d_in)) d_out = len(word_to_i_map) options_dict["d_out"] = d_out # Save `options_dict` options_dict_fn = path.join(options_dict["model_dir"], "options_dict.pkl.gz") logger.info("Saving options: " + options_dict_fn) f = data_io.smart_open(options_dict_fn, "wb") pickle.dump(options_dict, f, -1) f.close() logger.info("Options: " + str(options_dict)) # Setup model logger.info("Building CNN") # Symbolic variables i_batch = T.lscalar() # batch index x = T.matrix("x") # flattened data of shape (n_data, d_in) y = T.ivector("y") # labels # Build model logger.info("No. of word type targets: " + str(options_dict["d_out"])) input_shape = (options_dict["batch_size"], 1, 39, 200) model = cnn.CNN( rng, x, input_shape, options_dict["conv_layer_specs"], options_dict["hidden_layer_specs"], options_dict["d_out"], srng, options_dict["dropout_rates"] ) if options_dict["dropout_rates"] is not None: loss = model.dropout_negative_log_likelihood(y) else: loss = model.negative_log_likelihood(y) error = model.errors(y) # Add regularization if options_dict["l1_weight"] > 0. or options_dict["l2_weight"] > 0.: loss = loss + options_dict["l1_weight"]*model.l1 + options_dict["l2_weight"]* model.l2 # Compile test functions outputs = [error, loss] validate_model = theano.function( inputs=[i_batch], outputs=outputs, givens={ x: dev_x[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]], y: dev_y[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]] } ) test_model = theano.function( inputs=[i_batch], outputs=outputs, givens={ x: test_x[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]], y: test_y[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]] } ) # Gradients and training updates parameters = model.parameters gradients = T.grad(loss, parameters) learning_rule = options_dict["learning_rule"] if learning_rule["type"] == "adadelta": updates = training.learning_rule_adadelta( parameters, gradients, learning_rule["rho"], learning_rule["epsilon"] ) elif learning_rule["type"] == "momentum": updates = training.learning_rule_momentum( parameters, gradients, learning_rule["learning_rate"], learning_rule["momentum"] ) else: assert False, "Invalid learning rule: " + learning_rule["type"] # Compile training function train_model = theano.function( inputs=[i_batch], outputs=outputs, updates=updates, givens={ x: train_x[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]], y: train_y[i_batch * options_dict["batch_size"]: (i_batch + 1) * options_dict["batch_size"]] }, ) # Train model logger.info("Training CNN") record_dict_fn = path.join(options_dict["model_dir"], "record_dict.pkl.gz") record_dict = training.train_fixed_epochs_with_validation( options_dict["n_max_epochs"], train_model=train_model, train_batch_iterator=train_batch_iterator, validate_model=validate_model, validate_batch_iterator=validate_batch_iterator, test_model=test_model, test_batch_iterator=test_batch_iterator, save_model_func=model.save, save_model_fn=path.join(options_dict["model_dir"], "model.pkl.gz"), record_dict_fn=record_dict_fn, ) # Extrinsic evaluation # Pass data trough model logger.info("Performing same-different evaluation") layers_output_dict = apply_layers.apply_layers( options_dict["model_dir"], "dev", batch_size=645, i_layer=options_dict["i_layer_eval"] ) # batch size covers 10965 out of 10966 tokens utt_ids = sorted(layers_output_dict.keys()) embeddings = np.array([layers_output_dict[i] for i in utt_ids]) labels = data_io.swbd_utts_to_labels(utt_ids) # Perform same-different distances = pdist(embeddings, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) logger.info("Validation average precision: " + str(ap)) ap_fn = path.join(options_dict["model_dir"], "dev_ap.txt") with open(ap_fn, "w") as f: f.write(str(ap) + "\n")
def train_fixed_epochs(n_epochs, train_model, train_batch_iterator, test_model=None, test_batch_iterator=None, save_model_func=None, save_model_fn=None, record_dict_fn=None): """ Train for a fixed number of epochs. Parameters ---------- train_model : Theano function Should take input from `train_batch_iterator` and output the training loss. The function can provide more than one output, which is averaged. This is useful for example to output both negative log likelihood (the model loss) and zero-one loss (the number of errors). train_batch_iterator : generator Provides the training batches. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the best model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. Return ------ record_dict : dict The dict key describes the statistic being tracked, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) if test_model is not None: record_dict["test_loss"] = [ ] # testing is not necessarily performed every epoch record_dict["epoch_time"] = [] logger.info(datetime.now()) # Training epochs i_epoch_best = 0 test_loss = np.inf for i_epoch in xrange(n_epochs): logger.info("Epoch " + str(i_epoch + 1) + ":") # Loop over training batches # train_losses = [] start_time = timeit.default_timer() train_losses = [train_model(*batch) for batch in train_batch_iterator] # for i_batch in xrange(n_train_batches): # for batch in train_batch_iterator() # Calculate training loss for this batch and update parameters # train_losses.append(train_model(*batch)) # Test model if test_model is not None: test_losses = [test_model(*batch) for batch in test_batch_iterator] test_loss = np.mean(test_losses, axis=0) logger.info(" Test loss: " + str(test_loss)) record_dict["test_loss"].append((i_epoch, test_loss)) # Write this model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # Training statistics for this epoch end_time = timeit.default_timer() train_loss = np.mean(train_losses, axis=0) epoch_time = end_time - start_time # logger.info("Training loss: " + str(train_loss) # + ", " + # ) logger.info("Time: %f" % (epoch_time) + " sec, " + "training loss: " + str(train_loss) # + ", " + ) record_dict["epoch_time"].append((i_epoch, epoch_time)) record_dict["train_loss"].append((i_epoch, train_loss)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info("Training complete: %f min" % (total_time / 60.)) if test_model is not None: logger.info("Test loss: " + str(test_loss)) if save_model_func is not None: logger.info("Model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) logger.info(datetime.now()) return record_dict
def train_siamese_triplets_cnn(options_dict): """Train and save a Siamese CNN using the specified options.""" # Preliminary logger.info(datetime.now()) if not path.isdir(options_dict["model_dir"]): os.makedirs(options_dict["model_dir"]) if "log_to_file" in options_dict and options_dict["log_to_file"] is True: log_fn = path.join(options_dict["model_dir"], "log") print "Writing:", log_fn root_logger = logging.getLogger() if len(root_logger.handlers) > 0: root_logger.removeHandler( root_logger.handlers[0]) # close open file handler logging.basicConfig(filename=log_fn, level=logging.DEBUG) else: logging.basicConfig(level=logging.DEBUG) rng = np.random.RandomState(options_dict["rnd_seed"]) if options_dict["dropout_rates"] is not None: srng = RandomStreams(seed=options_dict["rnd_seed"]) else: srng = None options_dict_fn = path.join(options_dict["model_dir"], "options_dict.pkl.gz") logger.info("Saving options: " + options_dict_fn) f = data_io.smart_open(options_dict_fn, "wb") pickle.dump(options_dict, f, -1) f.close() logger.info("Options: " + str(options_dict)) # Load and format data # Load into shared variables datasets = data_io.load_swbd_same_diff(rng, options_dict["data_dir"]) train_x, train_matches_vec, train_labels = datasets[0] dev_x, dev_matches_vec, dev_labels = datasets[1] test_x, test_matches_vec, test_labels = datasets[2] # Flatten data d_in = 39 * 200 train_x = train_x.reshape((-1, d_in)) dev_x = dev_x.reshape((-1, d_in)) test_x = test_x.reshape((-1, d_in)) # Make batch iterators train_batch_iterator = BatchIteratorTriplets( rng, train_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=True) validate_batch_iterator = BatchIteratorTriplets( rng, dev_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False) test_batch_iterator = BatchIteratorTriplets( rng, test_matches_vec, options_dict["batch_size"], n_same_pairs=options_dict["n_same_pairs"], sample_diff_every_epoch=False) # Setup model logger.info("Building Siamese triplets CNN") # Symbolic variables x1 = T.matrix("x1") x2 = T.matrix("x2") x3 = T.matrix("x3") x1_indices = T.ivector("x1_indices") x2_indices = T.ivector("x2_indices") x3_indices = T.ivector("x3_indices") # Build model input_shape = (options_dict["batch_size"], 1, 39, 200) model = siamese.SiameseTripletCNN( rng, x1, x2, x3, input_shape, conv_layer_specs=options_dict["conv_layer_specs"], hidden_layer_specs=options_dict["hidden_layer_specs"], srng=srng, dropout_rates=options_dict["dropout_rates"], ) if options_dict["loss"] == "hinge_cos": if options_dict["dropout_rates"] is not None: loss = model.dropout_loss_hinge_cos(options_dict["margin"]) else: loss = model.loss_hinge_cos(options_dict["margin"]) error = model.loss_hinge_cos( options_dict["margin"] ) # doesn't include regularization or dropout else: assert False, "Invalid loss: " + options_dict["loss"] # Add regularization if options_dict["l1_weight"] > 0. or options_dict["l2_weight"] > 0.: loss = loss + options_dict["l1_weight"] * model.l1 + options_dict[ "l2_weight"] * model.l2 # Compile test functions same_distance = model.cos_same( ) # track the distances of same and different pairs separately diff_distance = model.cos_diff() outputs = [error, loss, same_distance, diff_distance] theano_mode = theano.Mode(linker="cvm") validate_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, givens={ x1: dev_x[x1_indices], x2: dev_x[x2_indices], x3: dev_x[x3_indices], }, mode=theano_mode, ) test_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, givens={ x1: test_x[x1_indices], x2: test_x[x2_indices], x3: test_x[x3_indices], }, mode=theano_mode, ) # Gradients and training updates parameters = model.parameters gradients = T.grad(loss, parameters) learning_rule = options_dict["learning_rule"] if learning_rule["type"] == "adadelta": updates = training.learning_rule_adadelta(parameters, gradients, learning_rule["rho"], learning_rule["epsilon"]) elif learning_rule["type"] == "momentum": updates = training.learning_rule_momentum( parameters, gradients, learning_rule["learning_rate"], learning_rule["momentum"]) else: assert False, "Invalid learning rule: " + learning_rule["type"] # Compile training function train_model = theano.function( inputs=[x1_indices, x2_indices, x3_indices], outputs=outputs, updates=updates, givens={ x1: train_x[x1_indices], x2: train_x[x2_indices], x3: train_x[x3_indices], }, mode=theano_mode, ) # Train model logger.info("Training Siamese triplets CNN") record_dict_fn = path.join(options_dict["model_dir"], "record_dict.pkl.gz") record_dict = training.train_fixed_epochs_with_validation( options_dict["n_max_epochs"], train_model=train_model, train_batch_iterator=train_batch_iterator, validate_model=validate_model, validate_batch_iterator=validate_batch_iterator, test_model=test_model, test_batch_iterator=test_batch_iterator, save_model_func=model.save, save_model_fn=path.join(options_dict["model_dir"], "model.pkl.gz"), record_dict_fn=record_dict_fn, ) # Extrinsic evaluation # Pass data trough model logger.info("Performing same-different evaluation") layers_output_dict = apply_layers.apply_layers( options_dict["model_dir"], "dev", batch_size=645) # batch size covers 10965 out of 10966 tokens utt_ids = sorted(layers_output_dict.keys()) embeddings = np.array([layers_output_dict[i] for i in utt_ids]) labels = data_io.swbd_utts_to_labels(utt_ids) # Perform same-different distances = pdist(embeddings, metric="cosine") matches = samediff.generate_matches_array(labels) ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False]) logger.info("Validation average precision: " + str(ap)) ap_fn = path.join(options_dict["model_dir"], "dev_ap.txt") with open(ap_fn, "w") as f: f.write(str(ap) + "\n")
def apply_layers(model_dir, set, batch_size=None, i_layer=-1): logger.info(datetime.now()) # Load the model options options_dict_fn = path.join(model_dir, "options_dict.pkl.gz") logger.info("Reading: " + options_dict_fn) f = smart_open(options_dict_fn) options_dict = pickle.load(f) # print options_dict f.close() # Load the dataset npz_fn = path.join(options_dict["data_dir"], "swbd." + set + ".npz") logger.info("Reading: " + npz_fn) npz = np.load(npz_fn) logger.info("Loaded " + str(len(npz.keys())) + " segments") # Load the model if batch_size is not None: options_dict["batch_size"] = batch_size else: options_dict["batch_size"] = len(npz.keys()) model = load_model(options_dict) # Load data into Theano shared variable utt_ids = sorted(npz.keys()) mats = np.array([npz[i] for i in utt_ids]) logger.info("Data shape: " + str(mats.shape)) logger.info("Formatting into Theano shared variable") shared_x = theano.shared(np.asarray(mats, dtype=theano.config.floatX), borrow=True) # Flatten data d_in = 39*200 shared_x = shared_x.reshape((-1, d_in)) # Compile function for passing segments through CNN layers x = model.input # input to the tied layers i_batch = T.lscalar() layers_output = model.layers[i_layer].output apply_model = theano.function( inputs=[i_batch], outputs=layers_output, givens={ x: shared_x[ i_batch * options_dict["batch_size"] : (i_batch + 1) * options_dict["batch_size"] ] } ) logger.info(datetime.now()) n_batches = mats.shape[0]/options_dict["batch_size"] logger.info("Passing data through in batches: " + str(n_batches)) layers_outputs = [] for i_batch in xrange(n_batches): batch_layers_outputs = apply_model(i_batch) layers_outputs.append(batch_layers_outputs) layers_outputs = np.vstack(layers_outputs) logger.info("Outputs shape: " + str(layers_outputs.shape)) layers_output_dict = {} # for i , utt_id in enumerate(utt_ids): for i in xrange(layers_outputs.shape[0]): utt_id = utt_ids[i] layers_output_dict[utt_id] = layers_outputs[i] logger.info(datetime.now()) return layers_output_dict
def train_fixed_epochs_with_validation(n_epochs, train_model, train_batch_iterator, validate_model, validate_batch_iterator, test_model=None, test_batch_iterator=None, save_model_func=None, save_model_fn=None, record_dict_fn=None): """ Train for a fixed number of epochs, using validation to decide which model to save. Parameters ---------- train_model : Theano function Should take input from `train_batch_iterator` and output the training loss. The function can provide more than one output, which is averaged. This is useful for example to output both negative log likelihood (the model loss) and zero-one loss (the number of errors). train_batch_iterator : generator Provides the training batches. validate_model : Theano function Should take input from `validate_batch_iterator` and output the validation loss. The function can provide more than one output (which would be averaged), but for the validation only the first output will be used (except if `validate_extrinsic` is provided). validate_extrinsic : function Extrinsic evaluation can be performed using this function. If provided, validation is performed on the output of this function instead of using the output from `validate_model`. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the best model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. Return ------ record_dict : dict The dict key describes the statistic being tracked, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) record_dict["validation_loss"] = [ ] # validation is not necessarily performed every epoch if test_model is not None: record_dict["test_loss"] = [] # and neither is testing # if validate_extrinsic is not None: # record_dict["validation_extrinsic"] = [] record_dict["epoch_time"] = [] logger.info(datetime.now()) # Training epochs best_validation_loss0 = np.inf test_loss = np.inf i_epoch_best = 0 for i_epoch in xrange(n_epochs): # Loop over training batches # train_losses = [] start_time = timeit.default_timer() train_losses = [train_model(*batch) for batch in train_batch_iterator] # for i_batch in xrange(n_train_batches): # for batch in train_batch_iterator() # Calculate training loss for this batch and update parameters # train_losses.append(train_model(*batch)) # Validate the model validation_losses = [ validate_model(*batch) for batch in validate_batch_iterator ] validation_loss = np.mean(validation_losses, axis=0) logger.info("Epoch " + str(i_epoch + 1) + ": " "validation loss: " + str(validation_loss)) record_dict["validation_loss"].append((i_epoch, validation_loss)) # print math.isnan(validation_loss) if hasattr(validation_loss, "__len__"): validation_loss0 = validation_loss[0] else: validation_loss0 = validation_loss # If this is the best model, test and save if validation_loss0 < best_validation_loss0: best_validation_loss0 = validation_loss0 i_epoch_best = i_epoch # Test model if test_model is not None: test_losses = [ test_model(*batch) for batch in test_batch_iterator ] test_loss = np.mean(test_losses, axis=0) logger.info(" Test loss: " + str(test_loss)) record_dict["test_loss"].append((i_epoch, test_loss)) # Write the best model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # Training statistics for this epoch end_time = timeit.default_timer() train_loss = np.mean(train_losses, axis=0) epoch_time = end_time - start_time # logger.info("Training loss: " + str(train_loss) # + ", " + # ) logger.info("Time: %f" % (epoch_time) + " sec, " + "training loss: " + str(train_loss) # + ", " + ) record_dict["epoch_time"].append((i_epoch, epoch_time)) record_dict["train_loss"].append((i_epoch, train_loss)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info("Training complete: %f min" % (total_time / 60.)) logger.info("Best validation epoch: " + str(i_epoch_best + 1) + ", " "best validation loss: " + str(best_validation_loss0)) if test_model is not None: logger.info("Test loss: " + str(test_loss)) if save_model_func is not None: logger.info("Best validation model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) logger.info(datetime.now()) return record_dict
def train_early_stopping(n_train_batches, n_validation_batches, train_model, validate_model, test_model=None, n_test_batches=None, n_max_epochs=1000, n_batches_validation_frequency=None, n_patience=5000, patience_increase_factor=2, improvement_threshold=0.995, save_model_func=None, save_model_fn=None, record_dict_fn=None, learning_rate_update=None): """ Train model using early stopping, using the provided training function. Parameters ---------- n_train_batches : int Total number of training batches. n_validation_batches : int Total number of validation batches. train_model : Theano function Should take as input a batch index and output the training loss and error (e.g. negative log likelihood and zero-one loss). validate_model : Theano function Should take as input a batch index and output the validation loss and error. test_model : Theano function Should take as input a batch index and output the test loss and error. If not provided, testing is not performed over the training iterations. n_test_batches : int Total number of test batches. n_batches_validation_frequency : int Number of batches between calculating the validation error; if not provided, is set to min(n_train_batches, n_patience / 2) which means that at a minimum validation will be performed every epoch (i.e. every time after seeing `n_train_batches` batches). n_patience : int Number of minibatches to consider at a minimum before completing training. patience_increase_factor : int When a new validation minimum is found, the number of seen minibatches are multiplied by this factor to give the new minimum number of minibatches before stopping. improvement_threshold : float The minimum relative improvement in validation error to be warrant an increase in `n_patience` by `patience_increase_factor`. save_model_func : function If provided, this function is used to the save the model to the file `save_model_fn` every time a new validation best model is found. save_model_fn : str The file to which the current model is written. record_dict_fn : str If provided, the current `record_dict` is saved to this file at the end of every epoch. learning_rate_update : Theano function If provided, this function is called (without any parameters) at the beginning of every epoch to update the learning rate. Return ------ record_dict : dict The dict key describes the statistic being tract, while the dict value is a list of (epoch, statistic) tuples giving the statistic-value at a particular epoch. """ assert (save_model_func is None) or (save_model_fn is not None) assert (test_model is None) or (n_test_batches is not None) # Set default if not provided if n_batches_validation_frequency is None: n_batches_validation_frequency = min(n_train_batches, n_patience / 2) record_dict = {} record_dict["train_loss"] = [] # each element is (epoch, loss) record_dict["train_error"] = [] record_dict["validation_loss"] = [ ] # validation is not necessarily performed every epoch record_dict["validation_error"] = [] if test_model is not None: record_dict["test_loss"] = [] # and neither is testing record_dict["test_error"] = [] record_dict["epoch_time"] = [] # Training epochs i_epoch = 0 done_looping = False best_validation_error = np.inf n_batches_best = 0 i_epoch_best = 0 while (i_epoch < n_max_epochs) and (not done_looping): train_losses = [] train_errors = [] start_time = timeit.default_timer() if learning_rate_update is not None: learning_rate = learning_rate_update(i_epoch) # Minibatches for i_batch in xrange(n_train_batches): # Calculate cost for this minibatch, updating the parameters minibatch_train_loss, minibatch_train_errors = train_model(i_batch) train_errors.append(minibatch_train_errors) train_losses.append(minibatch_train_loss) # print train_losses # print i_batch, train_model(i_batch) # break n_seen_batches = i_epoch * n_train_batches + i_batch # Use n_seen_batches + 1 to avoid checking very first batch if (n_seen_batches + 1) % n_batches_validation_frequency == 0: # Validate model validation_losses_errors = [ validate_model(i) for i in xrange(n_validation_batches) ] validation_loss = np.mean( [i[0] for i in validation_losses_errors]) validation_error = np.mean( [i[1] for i in validation_losses_errors]) logger.info( "Validation: epoch %i, minibatch %i/%i, loss %f, error %.2f%%" % (i_epoch + 1, i_batch + 1, n_train_batches, validation_loss, validation_error * 100.)) record_dict["validation_loss"].append( (i_epoch, validation_loss)) record_dict["validation_error"].append( (i_epoch, validation_error)) # Check validation to see if we have new best model if validation_error < best_validation_error: if validation_error < best_validation_error * improvement_threshold: n_patience = max( n_patience, n_seen_batches * patience_increase_factor) best_validation_error = validation_error n_batches_best = n_seen_batches i_epoch_best = i_epoch if test_model is not None: # test_losses = [test_model(i) for i in xrange(n_test_batches)] test_losses_errors = [ test_model(i) for i in xrange(n_test_batches) ] test_loss = np.mean([i[0] for i in test_losses_errors]) test_error = np.mean( [i[1] for i in test_losses_errors]) logger.info("\tTest: loss %f, error %.2f%%" % (test_loss, test_error * 100.)) # logger.info( # "Test: epoch %i, minibatch %i/%i, error %f%%" % # (i_epoch + 1, i_batch + 1, n_train_batches, test_loss * 100) # ) record_dict["test_loss"].append((i_epoch, test_loss)) record_dict["test_error"].append((i_epoch, test_error)) # Write the best model if save_model_func is not None: f = smart_open(save_model_fn, "wb") save_model_func(f) f.close() # break # Check if training is done if n_patience <= n_seen_batches: done_looping = True break end_time = timeit.default_timer() epoch_time = end_time - start_time record_dict["epoch_time"].append((i_epoch, epoch_time)) # print train_losses # print train_errors cur_train_loss = np.mean(train_losses) cur_train_error = np.mean(train_errors) if learning_rate_update is not None: logger.info( "Train: lr %f, epoch %i, %f sec/epoch, loss %f, error %.2f%%" % (learning_rate, i_epoch + 1, epoch_time, cur_train_loss, cur_train_error * 100.)) else: logger.info( "Train: epoch %i, %f sec/epoch, loss %f, error %.2f%%" % (i_epoch + 1, epoch_time, cur_train_loss, cur_train_error * 100.)) record_dict["train_loss"].append((i_epoch, cur_train_loss)) record_dict["train_error"].append((i_epoch, cur_train_error)) if record_dict_fn is not None: f = smart_open(record_dict_fn, "wb") pickle.dump(record_dict, f, -1) f.close() i_epoch += 1 total_time = np.sum([i[1] for i in record_dict["epoch_time"]]) logger.info( "Training complete: %d epochs, %f sec/epoch, total time %f min" % (i_epoch, 1. * total_time / i_epoch, total_time / 60.)) logger.info( "Best validation: after seeing %d minibatches in epoch %d, error %.2f%%" % (n_batches_best, i_epoch_best + 1, best_validation_error * 100.)) if test_model is not None: logger.info("Test error: %.2f%%" % (test_error * 100.)) if save_model_func is not None: logger.info("Best validation model saved: " + save_model_fn) if record_dict_fn is not None: logger.info("Saved record: " + record_dict_fn) return record_dict