예제 #1
0
def main():
    args = check_argv()

    print datetime.datetime.now()

    print "Reading:", args.npz_fn
    npz = np.load(args.npz_fn)

    print datetime.datetime.now()

    # if args.normalize:
    #     print "Normalizing embeddings"
    # else:
    print "Ordering embeddings"
    n_embeds = 0
    X = []
    ids = []
    for label in sorted(npz):
        ids.append(label)
        X.append(npz[label])
        n_embeds += 1
    X = np.array(X)
    print "No. embeddings:", n_embeds
    print "Embedding dimensionality:", X.shape[1]

    print datetime.datetime.now()

    print "Calculating distances"
    distances = pdist(X, metric=args.metric)

    print datetime.datetime.now()

    print "Getting labels"
    labels = []
    for utt_id in ids:
        word = "_".join(utt_id.split("_")[:-2])
        labels.append(word)

    if args.mean_ap:
        print datetime.datetime.now()
        print "Calculating mean average precision"
        mean_ap, mean_prb, ap_dict = samediff.mean_average_precision(distances, labels)
        print "Mean average precision:", mean_ap
        print "Mean precision-recall breakeven:", mean_prb

    print datetime.datetime.now()

    print "Calculating average precision"
    matches = samediff.generate_matches_array(labels)

    ap, prb = samediff.average_precision(distances[matches == True], distances[matches == False])
    print "Average precision:", ap
    print "Precision-recall breakeven:", prb

    print datetime.datetime.now()
예제 #2
0
def main():
    args = check_argv()

    print(datetime.now())

    print("Reading:", args.npz_fn)
    npz = np.load(args.npz_fn)

    print(datetime.now())

    print("Ordering embeddings")
    n_embeds = 0
    X = []
    ids = []
    for label in sorted(npz):
        ids.append(label)
        X.append(npz[label])
        n_embeds += 1
    X = np.array(X)
    print("No. embeddings:", n_embeds)
    print("Embedding dimensionality:", X.shape[1])

    if args.mvn:
        normed = (X - X.mean(axis=0)) / X.std(axis=0)
        X = normed

    print(datetime.now())

    print("Calculating distances")
    metric = args.metric
    if metric == "kl":
        import scipy.stats
        metric = scipy.stats.entropy
    distances = pdist(X, metric=metric)

    print(datetime.now())

    print("Getting labels and speakers")
    labels = []
    speakers = []
    for utt_id in ids:
        utt_id = utt_id.split("_")
        word = utt_id[0]
        speaker = utt_id[1]
        labels.append(word)
        speakers.append(speaker)

    if args.mean_ap:
        print(datetime.now())
        print("Calculating mean average precision")
        mean_ap, mean_prb, ap_dict = samediff.mean_average_precision(
            distances, labels)
        print("Mean average precision:", mean_ap)
        print("Mean precision-recall breakeven:", mean_prb)

    print(datetime.now())

    print("Calculating average precision")
    # matches = samediff.generate_matches_array(labels)  # Temp
    word_matches = samediff.generate_matches_array(labels)
    speaker_matches = samediff.generate_matches_array(speakers)
    print("No. same-word pairs:", sum(word_matches))
    print("No. same-speaker pairs:", sum(speaker_matches))

    sw_ap, sw_prb, swdp_ap, swdp_prb = samediff.average_precision_swdp(
        distances[np.logical_and(word_matches, speaker_matches)],
        distances[np.logical_and(word_matches, speaker_matches == False)],
        distances[word_matches == False])
    print("-" * 79)
    print("Average precision: {:.8f}".format(sw_ap))
    print("Precision-recall breakeven: {:.8f}".format(sw_prb))
    print("SWDP average precision: {:.8f}".format(swdp_ap))
    print("SWDP precision-recall breakeven: {:.8f}".format(swdp_prb))
    print("-" * 79)

    print(datetime.now())
def main():
    args = check_argv()

    print(datetime.now())

    print("Reading:", args.npz_fn)
    npz = np.load(args.npz_fn)

    print(datetime.now())

    # if args.normalize:
    #     print("Normalizing embeddings")
    # else:
    print("Ordering embeddings")
    n_embeds = 0
    X = []
    ids = []
    for label in sorted(npz):
        ids.append(label)
        X.append(npz[label])
        n_embeds += 1
    X = np.array(X)
    print("No. embeddings:", n_embeds)
    print("Embedding dimensionality:", X.shape[1])

    if args.mvn:
        normed = (X - X.mean(axis=0)) / X.std(axis=0)
        X = normed

    print(datetime.now())

    print("Calculating distances")
    distances = pdist(X, metric=args.metric)

    print(datetime.now())

    print("Getting labels")
    labels = []
    for utt_id in ids:
        word = utt_id.split("_")[0]
        labels.append(word)

    if args.mean_ap:
        print(datetime.now())
        print("Calculating mean average precision")
        mean_ap, mean_prb, ap_dict = samediff.mean_average_precision(
            distances, labels)
        print("Mean average precision:", mean_ap)
        print("Mean precision-recall breakeven:", mean_prb)

    print(datetime.now())

    print("Calculating average precision")
    matches = samediff.generate_matches_array(labels)

    ap, prb = samediff.average_precision(distances[matches == True],
                                         distances[matches == False])
    print("Average precision:", ap)
    print("Precision-recall breakeven:", prb)

    print(datetime.now())