def apply_model(model_fn, subset, language):

    # assert language is None  # to-do

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data
    npz_fn = path.join(options_dict["data_dir"], subset + ".npz")
    if language is not None:
        if "buckeye" in npz_fn:
            npz_fn = npz_fn.replace("buckeye", language)
        elif "xitsonga" in npz_fn:
            npz_fn = npz_fn.replace("xitsonga", language)
    x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
        npz_fn)

    # Truncate and limit dimensionality
    data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"],
                                options_dict["max_length"])

    # Build model
    x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    x_lengths = tf.placeholder(TF_ITYPE, [None])
    model = build_model(x, x_lengths, options_dict)

    # Embed data
    batch_iterator = batching.SimpleIterator(x_data, len(x_data), False)
    saver = tf.train.Saver()
    with tf.Session() as session:
        saver.restore(session, model_fn)
        for batch_x_padded, batch_x_lengths in batch_iterator:
            np_x = batch_x_padded
            np_x_lengths = batch_x_lengths
            np_z = session.run([model["encoding"]],
                               feed_dict={
                                   x: np_x,
                                   x_lengths: np_x_lengths
                               })[0]
            # np_y = session.run(
            #     [y], feed_dict={a: np_x, a_lengths: np_x_lengths, b_lengths: np_x_lengths}
            #     )[0]
            break  # single batch

    embed_dict = {}
    for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]):
        embed_dict[utt_key] = np_z[i]

    return embed_dict
Example #2
0
def train_cae(options_dict):
    """Train and save a CAE."""

    # PRELIMINARY
    assert (options_dict["train_tag"] != "rnd") or \
        (options_dict["cae_n_epochs"] == 0), \
        "random segment training only possible with AE (cae_n_epochs=0)"
    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    # hash_str = (
    #     datetime.now().strftime("%y%m%d.%Hh%M") + "." +
    #     # datetime.now().strftime("%y%m%d.%Hh%Mm%Ss") + "." +
    #     hasher.hexdigest()[:5]
    #     )
    hash_str = hasher.hexdigest()[:10]
    model_dir = path.join(
        "models", path.split(options_dict["data_dir"])[-1] + "." +
        options_dict["train_tag"],"cvae_model","70"
        )
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    # random.seed(options_dict["rnd_seed"])
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])


    # LOAD AND FORMAT DATA

    # Training data
    train_tag = options_dict["train_tag"]
    min_length = None
    if options_dict["train_tag"] == "rnd":
        min_length = options_dict["min_length"]
        train_tag = "all"
    npz_fn = path.join(
        options_dict["data_dir"], "train." + train_tag + ".npz"
        )
    ##############################################
    # train_x, train_labels, train_lengths, train_keys, train_speakers = (
        # data_io.load_data_from_npz(npz_fn, min_length)
        # )
    ##############################################
    train_x, train_labels, train_lengths, train_keys = (data_io.load_data_from_npz(npz_fn, min_length))
    ##############################################
    # Pretraining data (if specified)
    pretrain_tag = options_dict["pretrain_tag"]
    if options_dict["pretrain_tag"] is not None:
        min_length = None
        if options_dict["pretrain_tag"] == "rnd":
            min_length = options_dict["min_length"]
            pretrain_tag = "all"
        npz_fn = path.join(
            options_dict["data_dir"], "train." + pretrain_tag + ".npz"
            )
        (pretrain_x, pretrain_labels, pretrain_lengths, pretrain_keys) = data_io.load_data_from_npz(npz_fn, min_length)

    # Validation data
    if options_dict["use_test_for_val"]:
        npz_fn = path.join(options_dict["data_dir"], "test.npz")
    else:
        npz_fn = path.join(options_dict["data_dir"], "val.npz")
    val_x, val_labels, val_lengths, val_keys = (
        data_io.load_data_from_npz(npz_fn)
        )

    # Convert training speakers, if speaker embeddings
    if options_dict["d_speaker_embedding"] is not None:
        train_speaker_set = set(train_speakers)
        speaker_to_id = {}
        id_to_speaker = {}
        for i, speaker in enumerate(sorted(list(train_speaker_set))):
            speaker_to_id[speaker] = i
            id_to_speaker[i] = speaker
        train_speaker_ids = []
        for speaker in train_speakers:
            train_speaker_ids.append(speaker_to_id[speaker])
        train_speaker_ids = np.array(train_speaker_ids, dtype=NP_ITYPE)
        options_dict["n_speakers"] = max(speaker_to_id.values()) + 1

    # Truncate and limit dimensionality
    max_length = options_dict["max_length"]
    # d_frame = 13  # None
    ########################################################
    d_frame = 108
    ########################################################
    options_dict["n_input"] = d_frame
    print("Limiting dimensionality:", d_frame)
    print("Limiting length:", max_length)
    data_io.trunc_and_limit_dim(train_x, train_lengths, d_frame, max_length)
    if options_dict["pretrain_tag"] is not None:
        data_io.trunc_and_limit_dim(
            pretrain_x, pretrain_lengths, d_frame, max_length
            )
    data_io.trunc_and_limit_dim(val_x, val_lengths, d_frame, max_length)

    # Get pairs
    pair_list = batching.get_pair_list(train_labels)
   # pair_list = batching.get_pair_list(train_labels, both_directions = False)
    print("No. pairs:", int(len(pair_list)/2.0))  # pairs in both directions
   # print("No. pairs:", len(pair_list))

    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    pretrain_intermediate_model_fn = path.join(model_dir, "ae.tmp.ckpt")
    pretrain_model_fn = path.join(model_dir, "ae.best_val.ckpt")
    intermediate_model_fn = path.join(model_dir, "cvae.tmp.ckpt")
    model_fn = path.join(model_dir, "cvae.best_val.ckpt")

    # Model graph
    a = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    a_lengths = tf.placeholder(TF_ITYPE, [None])
    b = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    b_lengths = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_cae_from_options_dict(
        a, a_lengths, b_lengths, options_dict
        )


    #####################################
    z_mean = network_dict["z_mean"]
    z_log_sigma_sq = network_dict["z_log_sigma_sq"]
    z = network_dict["z"]
    ######################################
    mask = network_dict["mask"]
    # z = network_dict["z"]
    y = network_dict["y"]
    ##### y: [n_data, n_sample, maxlength, d_frame]
    if options_dict["d_speaker_embedding"] is not None:
        speaker_id = network_dict["speaker_id"]

    ######################################
    # # Reconstruction loss
    # # tf.reduce_sum(mask, 1) get how the real leangth of a datapoint (length, n_frame)
    # loss = tf.reduce_mean(
    #     tf.reduce_sum(tf.reduce_mean(tf.square(b - y), -1), -1) /
    #     tf.reduce_sum(mask, 1)
    #     )  # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    # # # Temp
    # # alpha = 0.1
    # # loss += alpha*tf.reduce_mean(
    # #     tf.reduce_sum(tf.reduce_mean(tf.square(a - y), -1), -1) /
    # #     tf.reduce_sum(mask, 1)
    # #     )  # temp

    ######################################

    ##### y: [n_data, n_sample, maxlength, d_frame]
    b_cvae = tf.expand_dims(b, 1)
    temp = tf.reduce_sum(tf.reduce_mean(tf.square(b_cvae - y), -1), -1) / tf.reduce_sum(mask, -1)
    temp = tf.reduce_min(temp, -1)
    reconstruction_loss = 1./(2*options_dict["sigma_sq"]) * tf.reduce_mean(temp)
    # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    regularisation_loss = -0.5*tf.reduce_sum(
        1 + z_log_sigma_sq - tf.square(z_mean) - tf.exp(z_log_sigma_sq), 1
        )
    loss = reconstruction_loss + tf.reduce_mean(regularisation_loss)
    ######################################

    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]
        ).minimize(loss)


    ######################################
    # loss for autoencoder is different from that of cvae
    y_ae = tf.reduce_mean(y, 1)
    mask_ae = tf.reduce_mean(mask, 1)
    loss_ae = tf.reduce_mean(
        tf.reduce_sum(tf.reduce_mean(tf.square(b - y_ae), -1), -1) /
        tf.reduce_sum(mask_ae, 1)
        )  # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    optimizer_ae = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]
        ).minimize(loss_ae)

    # AUTOENCODER PRETRAINING: TRAIN AND VALIDATE

    print(datetime.now())
    print("Pretraining model")

    # Validation function
    def samediff_val(normalise=True):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.SimpleIterator(val_x, len(val_x), False)
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x_padded, batch_x_lengths in val_batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                # np_z = session.run(
                    # [z], feed_dict={a: np_x, a_lengths: np_x_lengths}
                    # )[0]
                np_z = session.run(
                    [z_mean], feed_dict={a: np_x, a_lengths: np_x_lengths}
                    )[0]
                # print(np_z)
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
                [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            # print(np_z.shape)
            np_z_normalised = (np_z - np_z.mean(axis=0))/np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(
                distances[matches == True], distances[matches == False]
                )
        else:
            distances = pdist(np_z, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(
                distances[matches == True], distances[matches == False]
                )    
        return [prb, -ap]

    # Train AE
    val_model_fn = pretrain_intermediate_model_fn
    if options_dict["pretrain_tag"] is not None:
        if options_dict["pretrain_tag"] == "rnd":
            train_batch_iterator = batching.RandomSegmentsIterator(
                pretrain_x, options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"], shuffle_every_epoch=True,
                paired=True
                )
        else:
            train_batch_iterator = batching.PairedBucketIterator(
                pretrain_x, [(i, i) for i in range(len(pretrain_x))],
                options_dict["ae_batch_size"], options_dict["ae_n_buckets"],
                shuffle_every_epoch=True, speaker_ids=None if
                options_dict["d_speaker_embedding"] is None else
                train_speaker_ids
                )
    else:
        if options_dict["train_tag"] == "rnd":
            train_batch_iterator = batching.RandomSegmentsIterator(
                train_x, options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"], shuffle_every_epoch=True,
                paired=True
                )
        else:
            train_batch_iterator = batching.PairedBucketIterator(
                train_x, [(i, i) for i in range(len(train_x))],
                options_dict["ae_batch_size"], options_dict["ae_n_buckets"],
                shuffle_every_epoch=True, speaker_ids=None if
                options_dict["d_speaker_embedding"] is None else
                train_speaker_ids
                )
    if options_dict["d_speaker_embedding"] is None:
        ae_record_dict = training.train_fixed_epochs_external_val(
            options_dict["ae_n_epochs"], optimizer_ae, loss_ae, train_batch_iterator,
            [a, a_lengths, b, b_lengths], samediff_val,
            save_model_fn=pretrain_intermediate_model_fn,
            save_best_val_model_fn=pretrain_model_fn,
            n_val_interval=options_dict["ae_n_val_interval"]
            )
    else:
        ae_record_dict = training.train_fixed_epochs_external_val(
            options_dict["ae_n_epochs"], optimizer_ae, loss_ae, train_batch_iterator,
            [a, a_lengths, b, b_lengths, speaker_id], samediff_val,
            save_model_fn=pretrain_intermediate_model_fn,
            save_best_val_model_fn=pretrain_model_fn,
            n_val_interval=options_dict["ae_n_val_interval"]
            )


    # CORRESPONDENCE TRAINING: TRAIN AND VALIDATE

    if options_dict["cae_n_epochs"] > 0:
        print("Training model")

        cae_pretrain_model_fn = pretrain_model_fn
        if options_dict["pretrain_usefinal"]:
            cae_pretrain_model_fn = pretrain_intermediate_model_fn
        if options_dict["ae_n_epochs"] == 0:
            cae_pretrain_model_fn = None

        # Train CAE
        val_model_fn = intermediate_model_fn
        ##########################################################
        train_batch_iterator = batching.PairedBucketIterator(
            train_x, pair_list, batch_size=options_dict["cae_batch_size"],
            n_buckets=options_dict["cae_n_buckets"], shuffle_every_epoch=True,
            speaker_ids=None if options_dict["d_speaker_embedding"] is None
            else train_speaker_ids
            )
        ##########################################################
        # train_batch_iterator = batching.PairedBucketIterator(
        #     train_x, [(i, i) for i in range(len(train_x))], batch_size=options_dict["cae_batch_size"],
        #     n_buckets=options_dict["cae_n_buckets"], shuffle_every_epoch=True,
        #     speaker_ids=None if options_dict["d_speaker_embedding"] is None
        #     else train_speaker_ids
        #     )
        ##########################################################
            
        if options_dict["d_speaker_embedding"] is None:
            cae_record_dict = training.train_fixed_epochs_external_val(
                options_dict["cae_n_epochs"], optimizer, loss,
                train_batch_iterator, [a, a_lengths, b, b_lengths],
                samediff_val, save_model_fn=intermediate_model_fn,
                save_best_val_model_fn=model_fn,
                n_val_interval=options_dict["cae_n_val_interval"],
               # load_model_fn=cae_pretrain_model_fn
                load_model_fn=path.join(
        "models", path.split(options_dict["data_dir"])[-1] + "." +
        options_dict["train_tag"],"pretrain_ae","50", "ae.best_val.ckpt"
        ))
        else:
            cae_record_dict = training.train_fixed_epochs_external_val(
                options_dict["cae_n_epochs"], optimizer, loss,
                train_batch_iterator, [a, a_lengths, b, b_lengths, speaker_id],
                samediff_val, save_model_fn=intermediate_model_fn,
                save_best_val_model_fn=model_fn,
                n_val_interval=options_dict["cae_n_val_interval"],
                load_model_fn=cae_pretrain_model_fn
                )

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(ae_record_dict, f, -1)
        if options_dict["cae_n_epochs"] > 0:
            pickle.dump(cae_record_dict, f, -1)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:", options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)


    # FINAL EXTRINSIC EVALUATION

    print ("Performing final validation")
    if options_dict["cae_n_epochs"] == 0:
        if options_dict["extrinsic_usefinal"]:
            val_model_fn = pretrain_intermediate_model_fn
        else:
            val_model_fn = pretrain_model_fn
    else:
        if options_dict["extrinsic_usefinal"]:
            val_model_fn = intermediate_model_fn
        else:
            val_model_fn = model_fn
    prb, ap = samediff_val(normalise=False)
    ap = -ap
    prb_normalised, ap_normalised = samediff_val(normalise=True)
    ap_normalised = -ap_normalised
    print("Validation AP:", ap)
    print("Validation AP with normalisation:", ap_normalised)
    ap_fn = path.join(model_dir, "val_ap.txt")
    print("Writing:", ap_fn)
    with open(ap_fn, "w") as f:
        f.write(str(ap) + "\n")
        f.write(str(ap_normalised) + "\n")
    print("Validation model:", val_model_fn)

    print(datetime.now())
def apply_model(model_fn, language, subset, segtag):

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data and intervals
    npz_fn = path.join("data", language, subset + ".npz")
    x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
        npz_fn
        )
    seglist_fn = path.join(
        "data", language, "search.seglist." + segtag + ".pkl"
        )
    print("Reading:", seglist_fn)
    with open(seglist_fn, "rb") as f:
        seglist_dict = pickle.load(f)
    seglists = [seglist_dict[i] for i in keys]
    print("No. utterances:", len(x_data))
    n_intervals = sum([len(i) for i in seglists])
    print("No. intervals:", n_intervals)

    # assert False
    # print("Reading:", input_npz_fn)
    # features_dict = np.load(input_npz_fn)
    # seglist_fn = path.join(
    #     "data", language, "search.seglist." + segtag + ".pkl"
    #     )
    # print("Reading:", seglist_fn)
    # with open(seglist_fn, "rb") as f:
    #     seglist_dict = pickle.load(f)
    # utterances = sorted(features_dict.keys())
    # input_sequences = [features_dict[i] for i in utterances]
    # seglists = [seglist_dict[i] for i in utterances]
    # print("No. utterances:", len(input_sequences))
    # n_intervals = sum([len(i) for i in seglists])
    # print("No. intervals:", n_intervals)

    # if "cnn" in options_dict["script"]:
    #     assert False, "to-do"
    # else:  # rnn

    # print("No. utterances:", len(input_sequences))
    # n_intervals = sum([len(i) for i in seglists])
    # print("No. intervals:", n_intervals)


    # # Load data
    # npz_fn = path.join("data", language, subset + ".npz")
    # x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
    #     npz_fn
    #     )


    if "cnn" in options_dict["script"]:

        assert False, "to-do"

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(
            x_data, options_dict["max_length"], True
            )
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        batch_iterator = batching.LabelledIterator(
            x_data, None, x_data.shape[0], False
            )
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x in batch_iterator:
                np_z = session.run(
                    [model["encoding"]], feed_dict={x: batch_x})[0]
                break  # single batch

    else:  # rnn
        
        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(
            x_data, lengths, options_dict["n_input"], None
            )

        class DenseBatchFeedIterator(object):

            def __init__(self, input_sequences, seglists):
                self.input_sequences = input_sequences
                self.n_input = self.input_sequences[0].shape[-1]
                self.seglists = seglists

            def __iter__(self):
                for i_utt in range(len(self.input_sequences)):
                    
                    # Get intervals
                    seglist = self.seglists[i_utt]
                    input_sequence = self.input_sequences[i_utt]

                    # Get segments for intervals
                    segments = []
                    for i, j in seglist:
                        segments.append(input_sequence[i:j, :])

                    batch_x_lengths = [i.shape[0] for i in segments]

                    # Pad to maximum length in batch
                    batch_x_padded = np.zeros(
                        (len(batch_x_lengths), np.max(batch_x_lengths),
                        self.n_input), dtype=NP_DTYPE
                        )
                    for i, length in enumerate(batch_x_lengths):
                        seq = segments[i]
                        batch_x_padded[i, :length, :] = seq

                    yield (batch_x_padded, batch_x_lengths)

        batch_iterator = DenseBatchFeedIterator(x_data, seglists)

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        # batch_iterator = batching.SimpleIterator(x_data, len(x_data), False)
        saver = tf.train.Saver()
        n_outputs = 0
        embed_dict = {}
        with tf.Session() as session:
            saver.restore(session, model_fn)
            # print(datetime.now())
            print(
                "Applying model to segments ({} iterations):".format(
                len(x_data))
                )
            for i_batch, (batch_x_padded, batch_x_lengths) in \
                    tqdm(enumerate(batch_iterator)):
                cur_output = session.run(
                    [model["encoding"]], feed_dict={x: batch_x_padded,
                    x_lengths: batch_x_lengths}
                    )[0]
                utt_key = keys[i_batch]
                seglist = seglists[i_batch]
                embeddings = []
                for i in range(cur_output.shape[0]):
                    embeddings.append(cur_output[i, :])
                    n_outputs += 1
                embed_dict[utt_key] = np.array(embeddings)
            # print(datetime.now())

            # for batch_x_padded, batch_x_lengths in batch_iterator:
            #     np_x = batch_x_padded
            #     np_x_lengths = batch_x_lengths
            #     np_z = session.run(
            #         [model["encoding"]], feed_dict={x: np_x, x_lengths:
            #         np_x_lengths}
            #         )[0]
            #     break  # single batch

    print("Processed {} out of {} inputs".format(n_outputs, n_intervals))
    
    return embed_dict
Example #4
0
def train_rnn(options_dict):
    """Train and save a Siamese triplets model."""

    # PRELIMINARY

    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    hash_str = hasher.hexdigest()[:10]
    model_dir = path.join(
        "models", options_dict["train_lang"] + "." + options_dict["train_tag"],
        options_dict["script"], hash_str)
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    random.seed(options_dict["rnd_seed"])
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])

    # LOAD AND FORMAT DATA

    # Training data
    if "+" in options_dict["train_lang"]:
        train_languages = options_dict["train_lang"].split("+")
        train_x = []
        train_labels = []
        train_lengths = []
        train_keys = []
        train_speakers = []
        for cur_lang in train_languages:
            cur_npz_fn = path.join(
                "data", cur_lang,
                "train." + options_dict["train_tag"] + ".npz")
            (cur_train_x, cur_train_labels, cur_train_lengths, cur_train_keys,
             cur_train_speakers) = data_io.load_data_from_npz(
                 cur_npz_fn, None)
            (cur_train_x, cur_train_labels, cur_train_lengths, cur_train_keys,
             cur_train_speakers) = data_io.filter_data(
                 cur_train_x,
                 cur_train_labels,
                 cur_train_lengths,
                 cur_train_keys,
                 cur_train_speakers,
                 n_min_tokens_per_type=options_dict["n_min_tokens_per_type"],
                 n_max_types=options_dict["n_max_types"],
                 n_max_tokens=options_dict["n_max_tokens"],
                 n_max_tokens_per_type=options_dict["n_max_tokens_per_type"],
             )
            train_x.extend(cur_train_x)
            train_labels.extend(cur_train_labels)
            train_lengths.extend(cur_train_lengths)
            train_keys.extend(cur_train_keys)
            train_speakers.extend(cur_train_speakers)
        print("Total no. items:", len(train_labels))
    else:
        npz_fn = path.join("data", options_dict["train_lang"],
                           "train." + options_dict["train_tag"] + ".npz")
        train_x, train_labels, train_lengths, train_keys, train_speakers = (
            data_io.load_data_from_npz(npz_fn, None))
        train_x, train_labels, train_lengths, train_keys, train_speakers = (
            data_io.filter_data(
                train_x,
                train_labels,
                train_lengths,
                train_keys,
                train_speakers,
                n_min_tokens_per_type=options_dict["n_min_tokens_per_type"],
                n_max_types=options_dict["n_max_types"],
                n_max_tokens=options_dict["n_max_tokens"],
                n_max_tokens_per_type=options_dict["n_max_tokens_per_type"],
            ))

    # Convert training labels to integers
    train_label_set = list(set(train_labels))
    label_to_id = {}
    for i, label in enumerate(sorted(train_label_set)):
        label_to_id[label] = i
    train_y = []
    for label in train_labels:
        train_y.append(label_to_id[label])
    train_y = np.array(train_y, dtype=NP_ITYPE)
    options_dict["n_classes"] = len(label_to_id)
    print("Total no. classes:", options_dict["n_classes"])

    # Validation data
    if options_dict["val_lang"] is not None:
        npz_fn = path.join("data", options_dict["val_lang"], "val.npz")
        val_x, val_labels, val_lengths, val_keys, val_speakers = (
            data_io.load_data_from_npz(npz_fn))

    # Truncate and limit dimensionality
    max_length = options_dict["max_length"]
    d_frame = 13  # None
    options_dict["n_input"] = d_frame
    print("Limiting dimensionality:", d_frame)
    print("Limiting length:", max_length)
    data_io.trunc_and_limit_dim(train_x, train_lengths, d_frame, max_length)
    if options_dict["val_lang"] is not None:
        data_io.trunc_and_limit_dim(val_x, val_lengths, d_frame, max_length)

    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    intermediate_model_fn = path.join(model_dir, "rnn.tmp.ckpt")
    model_fn = path.join(model_dir, "rnn.best_val.ckpt")

    # Model graph
    x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    x_lengths = tf.placeholder(TF_ITYPE, [None])
    y = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_rnn_from_options_dict(x, x_lengths, options_dict)
    encoding = network_dict["encoding"]
    output = network_dict["output"]

    # Cross entropy loss
    loss = tf.reduce_mean(
        tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y,
                                                       logits=output))
    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]).minimize(loss)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:", options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)

    # TRAIN AND VALIDATE

    print(datetime.now())
    print("Training model")

    # Validation function
    def samediff_val(normalise=True):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.SimpleIterator(val_x, len(val_x), False)
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        speakers = [val_speakers[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x_padded, batch_x_lengths in val_batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([encoding],
                                   feed_dict={
                                       x: np_x,
                                       x_lengths: np_x_lengths
                                   })[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
            [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0)) / np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
        else:
            distances = pdist(np_z, metric="cosine")
        # matches = samediff.generate_matches_array(labels)
        # ap, prb = samediff.average_precision(
        #     distances[matches == True], distances[matches == False]
        #     )
        word_matches = samediff.generate_matches_array(labels)
        speaker_matches = samediff.generate_matches_array(speakers)
        sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp(
            distances[np.logical_and(word_matches, speaker_matches)],
            distances[np.logical_and(word_matches, speaker_matches == False)],
            distances[word_matches == False])
        # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap]
        return [swdp_prb, -swdp_ap]

    # Train RNN
    val_model_fn = intermediate_model_fn
    train_batch_iterator = batching.LabelledBucketIterator(
        train_x,
        train_y,
        options_dict["batch_size"],
        n_buckets=options_dict["n_buckets"],
        shuffle_every_epoch=True)
    if options_dict["val_lang"] is None:
        record_dict = training.train_fixed_epochs(
            options_dict["n_epochs"],
            optimizer,
            loss,
            train_batch_iterator, [x, x_lengths, y],
            save_model_fn=intermediate_model_fn)
    else:
        record_dict = training.train_fixed_epochs_external_val(
            options_dict["n_epochs"],
            optimizer,
            loss,
            train_batch_iterator, [x, x_lengths, y],
            samediff_val,
            save_model_fn=intermediate_model_fn,
            save_best_val_model_fn=model_fn,
            n_val_interval=options_dict["n_val_interval"])

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(record_dict, f, -1)

    # FINAL EXTRINSIC EVALUATION

    if options_dict["val_lang"] is not None:
        print("Performing final validation")
        if options_dict["extrinsic_usefinal"]:
            val_model_fn = intermediate_model_fn
        else:
            val_model_fn = model_fn
        # sw_prb, sw_ap, swdp_prb, swdp_ap = samediff_val(normalise=False)
        swdp_prb, swdp_ap = samediff_val(normalise=False)
        # sw_ap = -sw_ap
        swdp_ap = -swdp_ap
        # (sw_prb_normalised, sw_ap_normalised, swdp_prb_normalised,
        #     swdp_ap_normalised) = samediff_val(normalise=True)
        swdp_prb_normalised, swdp_ap_normalised = samediff_val(normalise=True)
        # sw_ap_normalised = -sw_ap_normalised
        swdp_ap_normalised = -swdp_ap_normalised
        print("Validation SWDP AP:", swdp_ap)
        print("Validation SWDP AP with normalisation:", swdp_ap_normalised)
        ap_fn = path.join(model_dir, "val_ap.txt")
        print("Writing:", ap_fn)
        with open(ap_fn, "w") as f:
            f.write(str(swdp_ap) + "\n")
            f.write(str(swdp_ap_normalised) + "\n")
        print("Validation model:", val_model_fn)

    print(datetime.now())
Example #5
0
def apply_model(model_fn, subset, batch_size=None):

    # assert language is None  # to-do

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data
    npz_fn = path.join(options_dict["data_dir"], subset + ".npz")
    # if language is not None:
    #     if "buckeye" in npz_fn:
    #         npz_fn = npz_fn.replace("buckeye", language)
    #     elif "xitsonga" in npz_fn:
    #         npz_fn = npz_fn.replace("xitsonga", language)
    x_data, labels, lengths, keys = data_io.load_data_from_npz(npz_fn)

    if "cnn" in options_dict["script"]:

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"],
                                          True)
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        if batch_size is None:
            batch_iterator = batching.LabelledIterator(x_data, None,
                                                       x_data.shape[0], False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                for batch_x in batch_iterator:
                    np_z = session.run([model["encoding"]],
                                       feed_dict={x: batch_x})[0]
                    break  # single batch
        else:
            assert False, "need to implement"

    else:  # rnn

        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"],
                                    options_dict["max_length"])

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        if batch_size is None:
            batch_iterator = batching.SimpleIterator(x_data, len(x_data),
                                                     False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                for batch_x_padded, batch_x_lengths in batch_iterator:
                    np_x = batch_x_padded
                    np_x_lengths = batch_x_lengths
                    np_z = session.run([model["encoding"]],
                                       feed_dict={
                                           x: np_x,
                                           x_lengths: np_x_lengths
                                       })[0]
                    break  # single batch
        else:
            batch_iterator = batching.SimpleIterator(x_data, batch_size, False)
            saver = tf.train.Saver()
            with tf.Session() as session:
                saver.restore(session, model_fn)
                np_z = []
                for batch_x_padded, batch_x_lengths in batch_iterator:
                    np_x = batch_x_padded
                    np_x_lengths = batch_x_lengths
                    cur_np_z = session.run([model["encoding"]],
                                           feed_dict={
                                               x: np_x,
                                               x_lengths: np_x_lengths
                                           })[0]
                    print("!", cur_np_z.shape)
                    np_z.append(cur_np_z)
                np_z = np.vstack(np_z)
                print("!", np_z.shape)

    embed_dict = {}
    for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]):
        embed_dict[utt_key] = np_z[i]

    return embed_dict
def train_vae(options_dict):
    """Train and save a VAE."""

    # PRELIMINARY

    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    hash_str = hasher.hexdigest()[:10]
    # model_dir = path.join(
    #     "models", path.split(options_dict["data_dir"])[-1] + "." +
    #     options_dict["train_tag"], options_dict["script"], hash_str
    #     )
    model_dir = path.join(
        "models",
        path.split(options_dict["data_dir"])[-1] + "." +
        options_dict["train_tag"], "vae_model", "70")
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])

    # LOAD AND FORMAT DATA

    # Training data
    train_tag = options_dict["train_tag"]
    min_length = None
    if options_dict["train_tag"] == "rnd":
        min_length = options_dict["min_length"]
        train_tag = "all"
    npz_fn = path.join(options_dict["data_dir"], "train." + train_tag + ".npz")
    train_x, train_labels, train_lengths, train_keys = (
        data_io.load_data_from_npz(npz_fn, min_length))

    # Validation data
    if options_dict["use_test_for_val"]:
        npz_fn = path.join(options_dict["data_dir"], "test.npz")
    else:
        npz_fn = path.join(options_dict["data_dir"], "val.npz")
    val_x, val_labels, val_lengths, val_keys = (
        data_io.load_data_from_npz(npz_fn))

    # Truncate and limit dimensionality
    max_length = options_dict["max_length"]
    d_frame = 108  # None
    options_dict["n_input"] = d_frame
    print("Limiting dimensionality:", d_frame)
    print("Limiting length:", max_length)
    data_io.trunc_and_limit_dim(train_x, train_lengths, d_frame, max_length)
    data_io.trunc_and_limit_dim(val_x, val_lengths, d_frame, max_length)

    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    intermediate_model_fn = path.join(model_dir, "vae.tmp.ckpt")
    model_fn = path.join(model_dir, "vae.best_val.ckpt")

    # Model graph
    x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    x_lengths = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_vae_from_options_dict(x, x_lengths, options_dict)
    encoder_states = network_dict["encoder_states"]
    vae = network_dict["latent_layer"]
    z_mean = vae["z_mean"]
    z_log_sigma_sq = vae["z_log_sigma_sq"]
    z = vae["z"]
    y = network_dict["decoder_output"]
    mask = network_dict["mask"]

    # VAE loss
    # reconstruction_loss = tf.reduce_mean(
    #     tf.reduce_sum(tf.reduce_mean(tf.square(x - y), -1), -1) /
    #     tf.reduce_sum(mask, 1)
    #     )  # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    # loss = tflego.vae_loss_gaussian(
    #     x, y, options_dict["sigma_sq"], z_mean, z_log_sigma_sq,
    #     reconstruction_loss=reconstruction_loss
    #     )
    reconstruction_loss = 1. / (2 * options_dict["sigma_sq"]) * tf.reduce_mean(
        tf.reduce_sum(tf.reduce_mean(tf.square(x - y), -1), -1) /
        tf.reduce_sum(mask, 1)
    )  # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    regularisation_loss = -0.5 * tf.reduce_sum(
        1 + z_log_sigma_sq - tf.square(z_mean) - tf.exp(z_log_sigma_sq), 1)
    loss = reconstruction_loss + tf.reduce_mean(regularisation_loss)
    # loss = tflego.vae_loss_gaussian(
    #     x, y, options_dict["sigma_sq"], z_mean, z_log_sigma_sq,
    #     reconstruction_loss=reconstruction_loss
    #     )
    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]).minimize(loss)

    # TRAIN AND VALIDATE

    print(datetime.now())
    print("Training model")

    # Validation function
    def samediff_val(normalise=False):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.SimpleIterator(val_x, len(val_x), False)
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x_padded, batch_x_lengths in val_batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([z_mean],
                                   feed_dict={
                                       x: np_x,
                                       x_lengths: np_x_lengths
                                   })[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
            [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0)) / np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(distances[matches == True],
                                                 distances[matches == False])
        else:
            distances = pdist(np_z, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(distances[matches == True],
                                                 distances[matches == False])
        return [prb, -ap]

    # Train VAE
    val_model_fn = intermediate_model_fn
    if options_dict["train_tag"] == "rnd":
        train_batch_iterator = batching.RandomSegmentsIterator(
            train_x,
            options_dict["batch_size"],
            options_dict["n_buckets"],
            shuffle_every_epoch=True)
    else:
        train_batch_iterator = batching.SimpleBucketIterator(
            train_x,
            options_dict["batch_size"],
            options_dict["n_buckets"],
            shuffle_every_epoch=True)
    record_dict = training.train_fixed_epochs_external_val(
        options_dict["n_epochs"],
        optimizer,
        loss,
        train_batch_iterator, [x, x_lengths],
        samediff_val,
        save_model_fn=intermediate_model_fn,
        save_best_val_model_fn=model_fn,
        n_val_interval=options_dict["n_val_interval"])

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(record_dict, f, -1)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:" + options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)

    # FINAL EXTRINSIC EVALUATION

    print("Performing final validation")
    if options_dict["extrinsic_usefinal"]:
        val_model_fn = intermediate_model_fn
    else:
        val_model_fn = model_fn
    prb, ap = samediff_val(normalise=False)
    ap = -ap
    prb_normalised, ap_normalised = samediff_val(normalise=True)
    ap_normalised = -ap_normalised
    print("Validation AP:", ap)
    print("Validation AP with normalisation:", ap_normalised)
    ap_fn = path.join(model_dir, "val_ap.txt")
    print("Writing:", ap_fn)
    with open(ap_fn, "w") as f:
        f.write(str(ap) + "\n")
        f.write(str(ap_normalised) + "\n")
    print("Validation model:", val_model_fn)

    print(datetime.now())
def apply_model(model_fn, npz_fn):

    # Load the model options
    model_dir = path.split(model_fn)[0]
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Reading:", options_dict_fn)
    with open(options_dict_fn, "rb") as f:
        options_dict = pickle.load(f)

    # Load data
    x_data, labels, lengths, keys, speakers = data_io.load_data_from_npz(
        npz_fn)

    if "cnn" in options_dict["script"]:

        # Pad and flatten data
        x_data, _ = data_io.pad_sequences(x_data, options_dict["max_length"],
                                          True)
        x_data = np.transpose(x_data, (0, 2, 1))
        x_data = x_data.reshape((-1, options_dict["d_in"]))

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, options_dict["d_in"]])
        model = build_model(x, None, options_dict)

        # Embed data
        batch_iterator = batching.LabelledIterator(x_data, None,
                                                   x_data.shape[0], False)
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x in batch_iterator:
                np_z = session.run([model["encoding"]], feed_dict={x:
                                                                   batch_x})[0]
                break  # single batch

    else:  # rnn

        # Truncate and limit dimensionality
        data_io.trunc_and_limit_dim(x_data, lengths, options_dict["n_input"],
                                    options_dict["max_length"])

        # Build model
        x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
        x_lengths = tf.placeholder(TF_ITYPE, [None])
        model = build_model(x, x_lengths, options_dict)

        # Embed data
        batch_iterator = batching.SimpleIterator(x_data, len(x_data), False)
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, model_fn)
            for batch_x_padded, batch_x_lengths in batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([model["encoding"]],
                                   feed_dict={
                                       x: np_x,
                                       x_lengths: np_x_lengths
                                   })[0]
                break  # single batch

    embed_dict = {}
    for i, utt_key in enumerate([keys[i] for i in batch_iterator.indices]):
        embed_dict[utt_key] = np_z[i]

    return embed_dict
Example #8
0
def train_cae(options_dict):
    """Train and save a CAE."""

    # PRELIMINARY
    assert (options_dict["train_tag"] != "rnd") or \
        (options_dict["cae_n_epochs"] == 0), \
        "random segment training only possible with AE (cae_n_epochs=0)"
    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    hash_str = hasher.hexdigest()[:10]
    model_dir = path.join(
        "models", options_dict["train_lang"] + "." + options_dict["train_tag"],
        options_dict["script"], hash_str)
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    random.seed(options_dict["rnd_seed"])
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])

    # LOAD AND FORMAT DATA

    # Training data
    train_tag = options_dict["train_tag"]
    min_length = None
    if options_dict["train_tag"] == "rnd":
        min_length = options_dict["min_length"]
        train_tag = "all"
    if "+" in options_dict["train_lang"]:
        train_x = []
        train_labels = []
        train_lengths = []
        train_keys = []
        train_speakers = []
        train_languages = []
        for cur_lang in options_dict["train_lang"].split("+"):
            cur_npz_fn = path.join("data", cur_lang,
                                   "train." + train_tag + ".npz")
            (cur_train_x, cur_train_labels, cur_train_lengths, cur_train_keys,
             cur_train_speakers) = data_io.load_data_from_npz(
                 cur_npz_fn, min_length)
            (cur_train_x, cur_train_labels, cur_train_lengths, cur_train_keys,
             cur_train_speakers) = data_io.filter_data(
                 cur_train_x,
                 cur_train_labels,
                 cur_train_lengths,
                 cur_train_keys,
                 cur_train_speakers,
                 n_min_tokens_per_type=options_dict["n_min_tokens_per_type"],
                 n_max_types=options_dict["n_max_types"],
                 n_max_tokens=options_dict["n_max_tokens"],
                 n_max_tokens_per_type=options_dict["n_max_tokens_per_type"],
             )
            train_x.extend(cur_train_x)
            train_labels.extend(cur_train_labels)
            train_lengths.extend(cur_train_lengths)
            train_keys.extend(cur_train_keys)
            train_speakers.extend(cur_train_speakers)
            train_languages.extend([cur_lang] * len(cur_train_speakers))
        print("Total no. items:", len(train_labels))
    else:
        npz_fn = path.join("data", options_dict["train_lang"],
                           "train." + train_tag + ".npz")
        train_x, train_labels, train_lengths, train_keys, train_speakers = (
            data_io.load_data_from_npz(npz_fn, min_length))
        train_x, train_labels, train_lengths, train_keys, train_speakers = (
            data_io.filter_data(
                train_x,
                train_labels,
                train_lengths,
                train_keys,
                train_speakers,
                n_min_tokens_per_type=options_dict["n_min_tokens_per_type"],
                n_max_types=options_dict["n_max_types"],
                n_max_tokens=options_dict["n_max_tokens"],
                n_max_tokens_per_type=options_dict["n_max_tokens_per_type"],
            ))

    # Pretraining data (if specified)
    pretrain_tag = options_dict["pretrain_tag"]
    if options_dict["pretrain_tag"] is not None:
        min_length = None
        if options_dict["pretrain_tag"] == "rnd":
            min_length = options_dict["min_length"]
            pretrain_tag = "all"
        npz_fn = path.join("data", options_dict["train_lang"],
                           "train." + pretrain_tag + ".npz")
        (pretrain_x, pretrain_labels, pretrain_lengths, pretrain_keys,
         pretrain_speakers) = data_io.load_data_from_npz(npz_fn, min_length)

    # Validation data
    if options_dict["val_lang"] is not None:
        npz_fn = path.join("data", options_dict["val_lang"], "val.npz")
        val_x, val_labels, val_lengths, val_keys, val_speakers = (
            data_io.load_data_from_npz(npz_fn))

    # # Convert training speakers, if speaker embeddings
    # # To-do: Untested
    # if options_dict["d_speaker_embedding"] is not None:
    #     train_speaker_set = set(train_speakers)
    #     speaker_to_id = {}
    #     id_to_speaker = {}
    #     for i, speaker in enumerate(sorted(list(train_speaker_set))):
    #         speaker_to_id[speaker] = i
    #         id_to_speaker[i] = speaker
    #     train_speaker_ids = []
    #     for speaker in train_speakers:
    #         train_speaker_ids.append(speaker_to_id[speaker])
    #     train_speaker_ids = np.array(train_speaker_ids, dtype=NP_ITYPE)
    #     options_dict["n_speakers"] = max(speaker_to_id.values()) + 1

    # Convert training languages to integers, if language embeddings
    if options_dict["d_language_embedding"] is not None:
        train_language_set = set(train_languages)
        language_to_id = {}
        id_to_lang = {}
        for i, lang in enumerate(sorted(list(train_language_set))):
            language_to_id[lang] = i
            id_to_lang[i] = lang
        train_language_ids = []
        for lang in train_languages:
            train_language_ids.append(language_to_id[lang])
        train_language_ids = np.array(train_language_ids, dtype=NP_ITYPE)
        options_dict["n_languages"] = max(language_to_id.values()) + 1

    # Truncate and limit dimensionality
    max_length = options_dict["max_length"]
    d_frame = 13  # None
    options_dict["n_input"] = d_frame
    print("Limiting dimensionality:", d_frame)
    print("Limiting length:", max_length)
    data_io.trunc_and_limit_dim(train_x, train_lengths, d_frame, max_length)
    if options_dict["pretrain_tag"] is not None:
        data_io.trunc_and_limit_dim(pretrain_x, pretrain_lengths, d_frame,
                                    max_length)
    if options_dict["val_lang"] is not None:
        data_io.trunc_and_limit_dim(val_x, val_lengths, d_frame, max_length)

    # Get pairs
    pair_list = batching.get_pair_list(train_labels,
                                       both_directions=True,
                                       n_max_pairs=options_dict["n_max_pairs"])
    print("No. pairs:", int(len(pair_list) / 2.0))  # pairs in both directions

    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    pretrain_intermediate_model_fn = path.join(model_dir, "ae.tmp.ckpt")
    pretrain_model_fn = path.join(model_dir, "ae.best_val.ckpt")
    intermediate_model_fn = path.join(model_dir, "cae.tmp.ckpt")
    model_fn = path.join(model_dir, "cae.best_val.ckpt")

    # Model graph
    a = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    a_lengths = tf.placeholder(TF_ITYPE, [None])
    b = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    b_lengths = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_cae_from_options_dict(a, a_lengths, b_lengths,
                                               options_dict)
    mask = network_dict["mask"]
    z = network_dict["z"]
    y = network_dict["y"]
    # if options_dict["d_speaker_embedding"] is not None:
    #     speaker_id = network_dict["speaker_id"]
    if options_dict["d_language_embedding"] is not None:
        language_id = network_dict["language_id"]

    # Reconstruction loss
    loss = tf.reduce_mean(
        tf.reduce_sum(tf.reduce_mean(tf.square(b - y), -1), -1) /
        tf.reduce_sum(mask, 1)
    )  # https://danijar.com/variable-sequence-lengths-in-tensorflow/
    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]).minimize(loss)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:", options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)

    # AUTOENCODER PRETRAINING: TRAIN AND VALIDATE

    print(datetime.now())
    print("Pretraining model")

    # Validation function
    def samediff_val(normalise=True):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.SimpleIterator(val_x, len(val_x), False)
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        speakers = [val_speakers[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x_padded, batch_x_lengths in val_batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([z],
                                   feed_dict={
                                       a: np_x,
                                       a_lengths: np_x_lengths
                                   })[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
            [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0)) / np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
        else:
            distances = pdist(np_z, metric="cosine")
        # matches = samediff.generate_matches_array(labels)
        # ap, prb = samediff.average_precision(
        #     distances[matches == True], distances[matches == False]
        #     )
        word_matches = samediff.generate_matches_array(labels)
        speaker_matches = samediff.generate_matches_array(speakers)
        sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp(
            distances[np.logical_and(word_matches, speaker_matches)],
            distances[np.logical_and(word_matches, speaker_matches == False)],
            distances[word_matches == False])
        # return [sw_prb, -sw_ap, swdp_prb, -swdp_ap]
        return [swdp_prb, -swdp_ap]

    # Train AE
    val_model_fn = pretrain_intermediate_model_fn
    if options_dict["pretrain_tag"] is not None:
        if options_dict["pretrain_tag"] == "rnd":
            train_batch_iterator = batching.RandomSegmentsIterator(
                pretrain_x,
                options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"],
                shuffle_every_epoch=True,
                paired=True)
        else:
            train_batch_iterator = batching.PairedBucketIterator(
                pretrain_x, [(i, i) for i in range(len(pretrain_x))],
                options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"],
                shuffle_every_epoch=True,
                language_ids=None
                if options_dict["d_language_embedding"] is None else
                train_language_ids,
                flip_output=options_dict["flip_output"])
    else:
        if options_dict["train_tag"] == "rnd":
            train_batch_iterator = batching.RandomSegmentsIterator(
                train_x,
                options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"],
                shuffle_every_epoch=True,
                paired=True)
        else:
            train_batch_iterator = batching.PairedBucketIterator(
                train_x, [(i, i) for i in range(len(train_x))],
                options_dict["ae_batch_size"],
                options_dict["ae_n_buckets"],
                shuffle_every_epoch=True,
                language_ids=None
                if options_dict["d_language_embedding"] is None else
                train_language_ids,
                flip_output=options_dict["flip_output"])
    if options_dict["d_language_embedding"] is None:
        if options_dict["val_lang"] is None:
            ae_record_dict = training.train_fixed_epochs(
                options_dict["ae_n_epochs"],
                optimizer,
                loss,
                train_batch_iterator, [a, a_lengths, b, b_lengths],
                save_model_fn=pretrain_intermediate_model_fn)
        else:
            ae_record_dict = training.train_fixed_epochs_external_val(
                options_dict["ae_n_epochs"],
                optimizer,
                loss,
                train_batch_iterator, [a, a_lengths, b, b_lengths],
                samediff_val,
                save_model_fn=pretrain_intermediate_model_fn,
                save_best_val_model_fn=pretrain_model_fn,
                n_val_interval=options_dict["ae_n_val_interval"])
    else:
        if options_dict["val_lang"] is None:
            ae_record_dict = training.train_fixed_epochs(
                options_dict["ae_n_epochs"],
                optimizer,
                loss,
                train_batch_iterator,
                [a, a_lengths, b, b_lengths, language_id],
                save_model_fn=pretrain_intermediate_model_fn)
        else:
            ae_record_dict = training.train_fixed_epochs_external_val(
                options_dict["ae_n_epochs"],
                optimizer,
                loss,
                train_batch_iterator,
                [a, a_lengths, b, b_lengths, language_id],
                samediff_val,
                save_model_fn=pretrain_intermediate_model_fn,
                save_best_val_model_fn=pretrain_model_fn,
                n_val_interval=options_dict["ae_n_val_interval"])

    # CORRESPONDENCE TRAINING: TRAIN AND VALIDATE

    if options_dict["cae_n_epochs"] > 0:
        print("Training model")

        cae_pretrain_model_fn = pretrain_model_fn
        if (options_dict["pretrain_usefinal"]
                or options_dict["val_lang"] is None):
            cae_pretrain_model_fn = pretrain_intermediate_model_fn
        if options_dict["ae_n_epochs"] == 0:
            cae_pretrain_model_fn = None

        # Train CAE
        val_model_fn = intermediate_model_fn
        train_batch_iterator = batching.PairedBucketIterator(
            train_x,
            pair_list,
            batch_size=options_dict["cae_batch_size"],
            n_buckets=options_dict["cae_n_buckets"],
            shuffle_every_epoch=True,
            language_ids=None if options_dict["d_language_embedding"] is None
            else train_language_ids,
            flip_output=options_dict["flip_output"])
        if options_dict["d_language_embedding"] is None:
            if options_dict["val_lang"] is None:
                cae_record_dict = training.train_fixed_epochs(
                    options_dict["cae_n_epochs"],
                    optimizer,
                    loss,
                    train_batch_iterator, [a, a_lengths, b, b_lengths],
                    save_model_fn=intermediate_model_fn,
                    load_model_fn=cae_pretrain_model_fn)
            else:
                cae_record_dict = training.train_fixed_epochs_external_val(
                    options_dict["cae_n_epochs"],
                    optimizer,
                    loss,
                    train_batch_iterator, [a, a_lengths, b, b_lengths],
                    samediff_val,
                    save_model_fn=intermediate_model_fn,
                    save_best_val_model_fn=model_fn,
                    n_val_interval=options_dict["cae_n_val_interval"],
                    load_model_fn=cae_pretrain_model_fn)
        else:
            if options_dict["val_lang"] is None:
                cae_record_dict = training.train_fixed_epochs(
                    options_dict["cae_n_epochs"],
                    optimizer,
                    loss,
                    train_batch_iterator,
                    [a, a_lengths, b, b_lengths, language_id],
                    samediff_val,
                    save_model_fn=intermediate_model_fn,
                    load_model_fn=cae_pretrain_model_fn)
            else:
                cae_record_dict = training.train_fixed_epochs_external_val(
                    options_dict["cae_n_epochs"],
                    optimizer,
                    loss,
                    train_batch_iterator,
                    [a, a_lengths, b, b_lengths, language_id],
                    samediff_val,
                    save_model_fn=intermediate_model_fn,
                    save_best_val_model_fn=model_fn,
                    n_val_interval=options_dict["cae_n_val_interval"],
                    load_model_fn=cae_pretrain_model_fn)

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(ae_record_dict, f, -1)
        if options_dict["cae_n_epochs"] > 0:
            pickle.dump(cae_record_dict, f, -1)

    # FINAL EXTRINSIC EVALUATION

    if options_dict["val_lang"] is not None:
        print("Performing final validation")
        if options_dict["cae_n_epochs"] == 0:
            if options_dict["extrinsic_usefinal"]:
                val_model_fn = pretrain_intermediate_model_fn
            else:
                val_model_fn = pretrain_model_fn
        else:
            if options_dict["extrinsic_usefinal"]:
                val_model_fn = intermediate_model_fn
            else:
                val_model_fn = model_fn
        # sw_prb, sw_ap, swdp_prb, swdp_ap = samediff_val(normalise=False)
        swdp_prb, swdp_ap = samediff_val(normalise=False)
        # sw_ap = -sw_ap
        swdp_ap = -swdp_ap
        swdp_prb_normalised, swdp_ap_normalised = samediff_val(normalise=True)
        # sw_ap_normalised = -sw_ap_normalised
        swdp_ap_normalised = -swdp_ap_normalised
        print("Validation SWDP AP:", swdp_ap)
        print("Validation SWDP AP with normalisation:", swdp_ap_normalised)
        ap_fn = path.join(model_dir, "val_ap.txt")
        print("Writing:", ap_fn)
        with open(ap_fn, "w") as f:
            f.write(str(swdp_ap) + "\n")
            f.write(str(swdp_ap_normalised) + "\n")
        print("Validation model:", val_model_fn)

    print(datetime.now())
Example #9
0
def train_siamese(options_dict):
    """Train and save a Siamese triplets model."""

    # PRELIMINARY

    print(datetime.now())

    # Output directory
    hasher = hashlib.md5(repr(sorted(options_dict.items())).encode("ascii"))
    hash_str = hasher.hexdigest()[:10]
    model_dir = path.join(
        "models",
        path.split(options_dict["data_dir"])[-1] + "." +
        options_dict["train_tag"], options_dict["script"], hash_str)
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Model directory:", model_dir)
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)
    print("Options:", options_dict)

    # Random seeds
    np.random.seed(options_dict["rnd_seed"])
    tf.set_random_seed(options_dict["rnd_seed"])

    # LOAD AND FORMAT DATA

    # Training data
    train_tag = options_dict["train_tag"]
    npz_fn = path.join(options_dict["data_dir"], "train." + train_tag + ".npz")
    train_x, train_labels, train_lengths, train_keys = (
        data_io.load_data_from_npz(npz_fn, None))

    # Convert training labels to integers
    train_label_set = list(set(train_labels))
    label_to_id = {}
    for i, label in enumerate(sorted(train_label_set)):
        label_to_id[label] = i
    train_y = []
    for label in train_labels:
        train_y.append(label_to_id[label])
    train_y = np.array(train_y, dtype=NP_ITYPE)

    # Validation data
    if options_dict["use_test_for_val"]:
        npz_fn = path.join(options_dict["data_dir"], "test.npz")
    else:
        npz_fn = path.join(options_dict["data_dir"], "val.npz")
    val_x, val_labels, val_lengths, val_keys = data_io.load_data_from_npz(
        npz_fn)

    # Truncate and limit dimensionality
    max_length = options_dict["max_length"]
    d_frame = 13  # None
    options_dict["n_input"] = d_frame
    print("Limiting dimensionality:", d_frame)
    print("Limiting length:", max_length)
    data_io.trunc_and_limit_dim(train_x, train_lengths, d_frame, max_length)
    data_io.trunc_and_limit_dim(val_x, val_lengths, d_frame, max_length)

    # DEFINE MODEL

    print(datetime.now())
    print("Building model")

    # Model filenames
    intermediate_model_fn = path.join(model_dir, "siamese.tmp.ckpt")
    model_fn = path.join(model_dir, "siamese.best_val.ckpt")

    # Model graph
    x = tf.placeholder(TF_DTYPE, [None, None, options_dict["n_input"]])
    x_lengths = tf.placeholder(TF_ITYPE, [None])
    y = tf.placeholder(TF_ITYPE, [None])
    network_dict = build_siamese_from_options_dict(x, x_lengths, options_dict)
    output = network_dict["output"]

    # Semi-hard triplets loss
    loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(
        labels=y, embeddings=output, margin=options_dict["margin"])
    optimizer = tf.train.AdamOptimizer(
        learning_rate=options_dict["learning_rate"]).minimize(loss)

    # TRAIN AND VALIDATE

    print(datetime.now())
    print("Training model")

    # Validation function
    def samediff_val(normalise=False):
        # Embed validation
        np.random.seed(options_dict["rnd_seed"])
        val_batch_iterator = batching.SimpleIterator(val_x, len(val_x), False)
        labels = [val_labels[i] for i in val_batch_iterator.indices]
        saver = tf.train.Saver()
        with tf.Session() as session:
            saver.restore(session, val_model_fn)
            for batch_x_padded, batch_x_lengths in val_batch_iterator:
                np_x = batch_x_padded
                np_x_lengths = batch_x_lengths
                np_z = session.run([output],
                                   feed_dict={
                                       x: np_x,
                                       x_lengths: np_x_lengths
                                   })[0]
                break  # single batch

        embed_dict = {}
        for i, utt_key in enumerate(
            [val_keys[i] for i in val_batch_iterator.indices]):
            embed_dict[utt_key] = np_z[i]

        # Same-different
        if normalise:
            np_z_normalised = (np_z - np_z.mean(axis=0)) / np_z.std(axis=0)
            distances = pdist(np_z_normalised, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(distances[matches == True],
                                                 distances[matches == False])
        else:
            distances = pdist(np_z, metric="cosine")
            matches = samediff.generate_matches_array(labels)
            ap, prb = samediff.average_precision(distances[matches == True],
                                                 distances[matches == False])
        return [prb, -ap]

    # Train Siamese model
    val_model_fn = intermediate_model_fn
    train_batch_iterator = batching.LabelledBucketIterator(
        train_x,
        train_y,
        options_dict["batch_size"],
        n_buckets=options_dict["n_buckets"],
        shuffle_every_epoch=True)
    record_dict = training.train_fixed_epochs_external_val(
        options_dict["n_epochs"],
        optimizer,
        loss,
        train_batch_iterator, [x, x_lengths, y],
        samediff_val,
        save_model_fn=intermediate_model_fn,
        save_best_val_model_fn=model_fn,
        n_val_interval=options_dict["n_val_interval"])

    # Save record
    record_dict_fn = path.join(model_dir, "record_dict.pkl")
    print("Writing:", record_dict_fn)
    with open(record_dict_fn, "wb") as f:
        pickle.dump(record_dict, f, -1)

    # Save options_dict
    options_dict_fn = path.join(model_dir, "options_dict.pkl")
    print("Writing:" + options_dict_fn)
    with open(options_dict_fn, "wb") as f:
        pickle.dump(options_dict, f, -1)

    # FINAL EXTRINSIC EVALUATION

    print("Performing final validation")
    if options_dict["extrinsic_usefinal"]:
        val_model_fn = intermediate_model_fn
    else:
        val_model_fn = model_fn
    prb, ap = samediff_val(normalise=False)
    ap = -ap
    prb_normalised, ap_normalised = samediff_val(normalise=True)
    ap_normalised = -ap_normalised
    print("Validation AP:", ap)
    print("Validation AP with normalisation:", ap_normalised)
    ap_fn = path.join(model_dir, "val_ap.txt")
    print("Writing:", ap_fn)
    with open(ap_fn, "w") as f:
        f.write(str(ap) + "\n")
        f.write(str(ap_normalised) + "\n")
    print("Validation model:", val_model_fn)

    print(datetime.now())