def samediff_val(normalise=False): # Embed validation np.random.seed(options_dict["rnd_seed"]) val_batch_iterator = batching.LabelledIterator( val_x, None, val_x.shape[0], False ) labels = [val_labels[i] for i in val_batch_iterator.indices] speakers = [val_speakers[i] for i in val_batch_iterator.indices] saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, val_model_fn) for batch_x in val_batch_iterator: np_z = session.run( [output], feed_dict={x: batch_x} )[0] break # single batch embed_dict = {} for i, utt_key in enumerate( [val_keys[i] for i in val_batch_iterator.indices]): embed_dict[utt_key] = np_z[i] # Same-different if normalise: np_z_normalised = (np_z - np_z.mean(axis=0))/np_z.std(axis=0) distances = pdist(np_z_normalised, metric="cosine") else: distances = pdist(np_z, metric="cosine") # matches = samediff.generate_matches_array(labels) # ap, prb = samediff.average_precision( # distances[matches == True], distances[matches == False] # ) word_matches = samediff.generate_matches_array(labels) speaker_matches = samediff.generate_matches_array(speakers) sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp( distances[np.logical_and(word_matches, speaker_matches)], distances[np.logical_and(word_matches, speaker_matches == False)], distances[word_matches == False] ) # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap] return [swdp_prb, -swdp_ap]
def train_siamese_cnn(options_dict): """Train and save a Siamese CNN triplets network.""" # PRELIMINARY print(datetime.now()) # Output directory hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii")) hash_str = hasher.hexdigest()[:10] model_dir = path.join( "models", options_dict["train_lang"] + "." + options_dict["train_tag"], options_dict["script"], hash_str ) options_dict_fn = path.join(model_dir, "options_dict.pkl") print("Model directory:", model_dir) if not os.path.isdir(model_dir): os.makedirs(model_dir) print("Options:", options_dict) # Random seeds random.seed(options_dict["rnd_seed"]) np.random.seed(options_dict["rnd_seed"]) tf.set_random_seed(options_dict["rnd_seed"]) # LOAD AND FORMAT DATA # Training data train_tag = options_dict["train_tag"] npz_fn = path.join( "data", options_dict["train_lang"], "train." + train_tag + ".npz" ) train_x, train_labels, train_lengths, train_keys, train_speakers = ( data_io.load_data_from_npz(npz_fn, None) ) # Convert training labels to integers train_label_set = list(set(train_labels)) label_to_id = {} for i, label in enumerate(sorted(train_label_set)): label_to_id[label] = i train_y = [] for label in train_labels: train_y.append(label_to_id[label]) train_y = np.array(train_y, dtype=NP_ITYPE) # Validation data if options_dict["val_lang"] is not None: npz_fn = path.join("data", options_dict["val_lang"], "val.npz") val_x, val_labels, val_lengths, val_keys, val_speakers = ( data_io.load_data_from_npz(npz_fn) ) # Zero-pad sequences max_length = options_dict["max_length"] print("Limiting length:", max_length) train_x, _ = data_io.pad_sequences(train_x, max_length, True) train_x = np.transpose(train_x, (0, 2, 1)) if options_dict["val_lang"] is not None: val_x, _ = data_io.pad_sequences(val_x, max_length, True) val_x = np.transpose(val_x, (0, 2, 1)) # Dimensionalities d_in = train_x.shape[1]*train_x.shape[2] input_shape = [-1, train_x.shape[1], train_x.shape[2], 1] # [n_data, height, width, channels] options_dict["d_in"] = d_in options_dict["input_shape"] = input_shape # Flatten data train_x = train_x.reshape((-1, d_in)) if options_dict["val_lang"] is not None: val_x = val_x.reshape((-1, d_in)) # DEFINE MODEL print(datetime.now()) print("Building model") # Model filenames intermediate_model_fn = path.join(model_dir, "siamese_cnn.tmp.ckpt") model_fn = path.join(model_dir, "siamese_cnn.best_val.ckpt") # Model graph x = tf.placeholder(TF_DTYPE, [None, d_in]) y = tf.placeholder(TF_ITYPE, [None]) network_dict = build_siamese_cnn_from_options_dict(x, options_dict) output = network_dict["output"] # Semi-hard triplets loss loss = tf.contrib.losses.metric_learning.triplet_semihard_loss( labels=y, embeddings=output, margin=options_dict["margin"] ) optimizer = tf.train.AdamOptimizer( learning_rate=options_dict["learning_rate"] ).minimize(loss) # Save options_dict options_dict_fn = path.join(model_dir, "options_dict.pkl") print("Writing:", options_dict_fn) with open(options_dict_fn, "wb") as f: pickle.dump(options_dict, f, -1) # TRAIN AND VALIDATE print(datetime.now()) print("Training model") # Validation function def samediff_val(normalise=False): # Embed validation np.random.seed(options_dict["rnd_seed"]) val_batch_iterator = batching.LabelledIterator( val_x, None, val_x.shape[0], False ) labels = [val_labels[i] for i in val_batch_iterator.indices] speakers = [val_speakers[i] for i in val_batch_iterator.indices] saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, val_model_fn) for batch_x in val_batch_iterator: np_z = session.run( [output], feed_dict={x: batch_x} )[0] break # single batch embed_dict = {} for i, utt_key in enumerate( [val_keys[i] for i in val_batch_iterator.indices]): embed_dict[utt_key] = np_z[i] # Same-different if normalise: np_z_normalised = (np_z - np_z.mean(axis=0))/np_z.std(axis=0) distances = pdist(np_z_normalised, metric="cosine") else: distances = pdist(np_z, metric="cosine") # matches = samediff.generate_matches_array(labels) # ap, prb = samediff.average_precision( # distances[matches == True], distances[matches == False] # ) word_matches = samediff.generate_matches_array(labels) speaker_matches = samediff.generate_matches_array(speakers) sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp( distances[np.logical_and(word_matches, speaker_matches)], distances[np.logical_and(word_matches, speaker_matches == False)], distances[word_matches == False] ) # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap] return [swdp_prb, -swdp_ap] # Train Siamese CNN model val_model_fn = intermediate_model_fn train_batch_iterator = batching.LabelledIterator( train_x, train_y, options_dict["batch_size"], shuffle_every_epoch=True ) if options_dict["val_lang"] is None: record_dict = training.train_fixed_epochs( options_dict["n_epochs"], optimizer, loss, train_batch_iterator, [x, y], save_model_fn=intermediate_model_fn, ) else: record_dict = training.train_fixed_epochs_external_val( options_dict["n_epochs"], optimizer, loss, train_batch_iterator, [x, y], samediff_val, save_model_fn=intermediate_model_fn, save_best_val_model_fn=model_fn, n_val_interval=options_dict["n_val_interval"] ) # Save record record_dict_fn = path.join(model_dir, "record_dict.pkl") print("Writing:", record_dict_fn) with open(record_dict_fn, "wb") as f: pickle.dump(record_dict, f, -1) # FINAL EXTRINSIC EVALUATION if options_dict["val_lang"] is not None: print ("Performing final validation") if options_dict["extrinsic_usefinal"]: val_model_fn = intermediate_model_fn else: val_model_fn = model_fn # sw_prb, sw_ap, swdp_prb, swdp_ap = samediff_val(normalise=False) swdp_prb, swdp_ap = samediff_val(normalise=False) # sw_ap = -sw_ap swdp_ap = -swdp_ap swdp_prb_normalised, swdp_ap_normalised = samediff_val(normalise=True) # sw_ap_normalised = -sw_ap_normalised swdp_ap_normalised = -swdp_ap_normalised print("Validation SWDP AP:", swdp_ap) print("Validation SWDP AP with normalisation:", swdp_ap_normalised) ap_fn = path.join(model_dir, "val_ap.txt") print("Writing:", ap_fn) with open(ap_fn, "w") as f: f.write(str(swdp_ap) + "\n") f.write(str(swdp_ap_normalised) + "\n") print("Validation model:", val_model_fn) print(datetime.now())
def apply_model(model_fn, subset, batch_size=None): # assert language is None # to-do # Load the model options model_dir = path.split(model_fn)[0] options_dict_fn = path.join(model_dir, "options_dict.pkl") print("Reading:", options_dict_fn) with open(options_dict_fn, "rb") as f: options_dict = pickle.load(f) # Load data npz_fn = path.join(options_dict["data_dir"], subset + ".npz") # if language is not None: # if "buckeye" in npz_fn: # npz_fn = npz_fn.replace("buckeye", language) # elif "xitsonga" in npz_fn: # npz_fn = npz_fn.replace("xitsonga", language) x_data, labels, lengths, keys = data_io.load_data_from_npz(npz_fn) if "cnn" in options_dict["script"]: # Pad and flatten data x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"], True) x_data = np.transpose(x_data, (0, 2, 1)) x_data = x_data.reshape((-1, options_dict["d_in"])) # Build model x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]]) model = build_model(x, None, options_dict) # Embed data if batch_size is None: batch_iterator = batching.LabelledIterator(x_data, None, x_data.shape[0], False) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) for batch_x in batch_iterator: np_z = session.run([model["encoding"]], feed_dict={x: batch_x})[0] break # single batch else: assert False, "need to implement" else: # rnn # Truncate and limit dimensionality data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"], options_dict["max_length"]) # Build model x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]]) x_lengths = tf.placeholder(TF_ITYPE, [None]) model = build_model(x, x_lengths, options_dict) # Embed data if batch_size is None: batch_iterator = batching.SimpleIterator(x_data, len(x_data), False) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) for batch_x_padded, batch_x_lengths in batch_iterator: np_x = batch_x_padded np_x_lengths = batch_x_lengths np_z = session.run([model["encoding"]], feed_dict={ x: np_x, x_lengths: np_x_lengths })[0] break # single batch else: batch_iterator = batching.SimpleIterator(x_data, batch_size, False) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) np_z = [] for batch_x_padded, batch_x_lengths in batch_iterator: np_x = batch_x_padded np_x_lengths = batch_x_lengths cur_np_z = session.run([model["encoding"]], feed_dict={ x: np_x, x_lengths: np_x_lengths })[0] print("!", cur_np_z.shape) np_z.append(cur_np_z) np_z = np.vstack(np_z) print("!", np_z.shape) embed_dict = {} for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]): embed_dict[utt_key] = np_z[i] return embed_dict
def apply_model(model_fn, language, subset, segtag): # Load the model options model_dir = path.split(model_fn)[0] options_dict_fn = path.join(model_dir, "options_dict.pkl") print("Reading:", options_dict_fn) with open(options_dict_fn, "rb") as f: options_dict = pickle.load(f) # Load data and intervals npz_fn = path.join("data", language, subset + ".npz") x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz( npz_fn ) seglist_fn = path.join( "data", language, "search.seglist." + segtag + ".pkl" ) print("Reading:", seglist_fn) with open(seglist_fn, "rb") as f: seglist_dict = pickle.load(f) seglists = [seglist_dict[i] for i in keys] print("No. utterances:", len(x_data)) n_intervals = sum([len(i) for i in seglists]) print("No. intervals:", n_intervals) # assert False # print("Reading:", input_npz_fn) # features_dict = np.load(input_npz_fn) # seglist_fn = path.join( # "data", language, "search.seglist." + segtag + ".pkl" # ) # print("Reading:", seglist_fn) # with open(seglist_fn, "rb") as f: # seglist_dict = pickle.load(f) # utterances = sorted(features_dict.keys()) # input_sequences = [features_dict[i] for i in utterances] # seglists = [seglist_dict[i] for i in utterances] # print("No. utterances:", len(input_sequences)) # n_intervals = sum([len(i) for i in seglists]) # print("No. intervals:", n_intervals) # if "cnn" in options_dict["script"]: # assert False, "to-do" # else: # rnn # print("No. utterances:", len(input_sequences)) # n_intervals = sum([len(i) for i in seglists]) # print("No. intervals:", n_intervals) # # Load data # npz_fn = path.join("data", language, subset + ".npz") # x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz( # npz_fn # ) if "cnn" in options_dict["script"]: assert False, "to-do" # Pad and flatten data x_data, _ = data_io.pad_sequences( x_data, options_dict["max_length"], True ) x_data = np.transpose(x_data, (0, 2, 1)) x_data = x_data.reshape((-1, options_dict["d_in"])) # Build model x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]]) model = build_model(x, None, options_dict) # Embed data batch_iterator = batching.LabelledIterator( x_data, None, x_data.shape[0], False ) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) for batch_x in batch_iterator: np_z = session.run( [model["encoding"]], feed_dict={x: batch_x})[0] break # single batch else: # rnn # Truncate and limit dimensionality data_io.trunc_and_limit_dim( x_data, lengths, options_dict["n_input"], None ) class DenseBatchFeedIterator(object): def __init__(self, input_sequences, seglists): self.input_sequences = input_sequences self.n_input = self.input_sequences[0].shape[-1] self.seglists = seglists def __iter__(self): for i_utt in range(len(self.input_sequences)): # Get intervals seglist = self.seglists[i_utt] input_sequence = self.input_sequences[i_utt] # Get segments for intervals segments = [] for i, j in seglist: segments.append(input_sequence[i:j, :]) batch_x_lengths = [i.shape[0] for i in segments] # Pad to maximum length in batch batch_x_padded = np.zeros( (len(batch_x_lengths), np.max(batch_x_lengths), self.n_input), dtype=NP_DTYPE ) for i, length in enumerate(batch_x_lengths): seq = segments[i] batch_x_padded[i, :length, :] = seq yield (batch_x_padded, batch_x_lengths) batch_iterator = DenseBatchFeedIterator(x_data, seglists) # Build model x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]]) x_lengths = tf.placeholder(TF_ITYPE, [None]) model = build_model(x, x_lengths, options_dict) # Embed data # batch_iterator = batching.SimpleIterator(x_data, len(x_data), False) saver = tf.train.Saver() n_outputs = 0 embed_dict = {} with tf.Session() as session: saver.restore(session, model_fn) # print(datetime.now()) print( "Applying model to segments ({} iterations):".format( len(x_data)) ) for i_batch, (batch_x_padded, batch_x_lengths) in \ tqdm(enumerate(batch_iterator)): cur_output = session.run( [model["encoding"]], feed_dict={x: batch_x_padded, x_lengths: batch_x_lengths} )[0] utt_key = keys[i_batch] seglist = seglists[i_batch] embeddings = [] for i in range(cur_output.shape[0]): embeddings.append(cur_output[i, :]) n_outputs += 1 embed_dict[utt_key] = np.array(embeddings) # print(datetime.now()) # for batch_x_padded, batch_x_lengths in batch_iterator: # np_x = batch_x_padded # np_x_lengths = batch_x_lengths # np_z = session.run( # [model["encoding"]], feed_dict={x: np_x, x_lengths: # np_x_lengths} # )[0] # break # single batch print("Processed {} out of {} inputs".format(n_outputs, n_intervals)) return embed_dict
def apply_model(model_fn, npz_fn): # Load the model options model_dir = path.split(model_fn)[0] options_dict_fn = path.join(model_dir, "options_dict.pkl") print("Reading:", options_dict_fn) with open(options_dict_fn, "rb") as f: options_dict = pickle.load(f) # Load data x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz( npz_fn) if "cnn" in options_dict["script"]: # Pad and flatten data x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"], True) x_data = np.transpose(x_data, (0, 2, 1)) x_data = x_data.reshape((-1, options_dict["d_in"])) # Build model x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]]) model = build_model(x, None, options_dict) # Embed data batch_iterator = batching.LabelledIterator(x_data, None, x_data.shape[0], False) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) for batch_x in batch_iterator: np_z = session.run([model["encoding"]], feed_dict={x: batch_x})[0] break # single batch else: # rnn # Truncate and limit dimensionality data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"], options_dict["max_length"]) # Build model x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]]) x_lengths = tf.placeholder(TF_ITYPE, [None]) model = build_model(x, x_lengths, options_dict) # Embed data batch_iterator = batching.SimpleIterator(x_data, len(x_data), False) saver = tf.train.Saver() with tf.Session() as session: saver.restore(session, model_fn) for batch_x_padded, batch_x_lengths in batch_iterator: np_x = batch_x_padded np_x_lengths = batch_x_lengths np_z = session.run([model["encoding"]], feed_dict={ x: np_x, x_lengths: np_x_lengths })[0] break # single batch embed_dict = {} for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]): embed_dict[utt_key] = np_z[i] return embed_dict