Ejemplo n.º 1
0
def compare_to_human_correctness(ratings, ratings_net, leeway_start=0.3, leeway_end=0.3):
    net_words = []
    for i, (rating, rating_net) in enumerate(zip(ratings, ratings_net)):
        net_start = rating_net[0] if rating_net is not None else None
        net_duration = rating_net[1] if rating_net is not None else None

        correct = False
        if net_start is not None:
            if not response_missing(rating) and not response_with_synonym(rating):
                if abs(rating.p_delay - rating_net[0]) <= leeway_start and abs(
                        rating.p_delay + rating.duration - (net_start + net_duration)) <= leeway_end:
                    correct = True
                else:
                    pass
            else:
                pass
        else:
            if not response_missing(rating) and not response_with_synonym(rating):
                pass
            else:
                correct = True

        net_words.append(NetRatedWord(rating.word, correct, rating.vp, rating.date, rating.wav_path))

    return net_words
Ejemplo n.º 2
0
def correlate_to_human_scores(human_ratings_file,
                              ratings_net,
                              leeway_start=0.3,
                              leeway_end=0.3):
    ratings: List[SnodgrassWordRating] = load_pickled(human_ratings_file)

    dists = []
    human_scores = []
    for i, (rating, rating_net) in enumerate(zip(ratings, ratings_net)):
        net_start = rating_net[0] if rating_net is not None else None
        net_duration = rating_net[1] if rating_net is not None else None
        net_dist = rating_net[2] if rating_net is not None else None
        net_n_segments = rating_net[3] + 1 if rating_net is not None else 0
        net_frames_before_rise = rating_net[
            4] + 1 if rating_net is not None and len(rating_net) > 4 else None

        if net_start is not None:
            if not response_missing(rating) and not response_with_synonym(
                    rating):
                if abs(rating.p_delay - rating_net[0]) <= leeway_start and abs(
                        rating.p_delay + rating.duration -
                    (net_start + net_duration)) <= leeway_end:
                    dists.append(net_dist)
                    human_scores.append(rating.p_score)
                else:
                    pass
            else:
                pass

    dists = np.array(dists)
    human_scores = np.array(human_scores)
    return pearsonr(1 / dists, human_scores)
Ejemplo n.º 3
0
def compare_to_human(ratings, ratings_net, leeway_start=0.3, leeway_end=0.3):
    true_positive = 0
    false_positive_wrong_time = 0
    false_positive_wasnt_there = 0
    true_negative = 0
    false_negative = 0
    real_positive = 0
    real_negative = 0
    segment_counts = []
    distances = []
    frames_before_rise = []
    for i, (rating, rating_net) in enumerate(zip(ratings, ratings_net)):
        net_start = rating_net[0] if rating_net is not None else None
        net_duration = rating_net[1] if rating_net is not None else None
        net_dist = rating_net[2] if rating_net is not None else None
        net_n_segments = rating_net[3] + 1 if rating_net is not None else 0
        net_frames_before_rise = rating_net[4] + 1 if rating_net is not None and len(rating_net) > 4 else None

        segment_counts.append(net_n_segments)
        distances.append(net_dist)

        if not response_missing(rating) and not response_with_synonym(rating):
            real_positive += 1
        else:
            real_negative += 1

        if net_start is not None:
            if not response_missing(rating) and not response_with_synonym(rating):
                if abs(rating.p_delay - rating_net[0]) <= leeway_start and abs(
                        rating.p_delay + rating.duration - (net_start + net_duration)) <= leeway_end:
                    true_positive += 1
                    frames_before_rise.append((0, net_frames_before_rise))
                else:
                    false_positive_wrong_time += 1
                    frames_before_rise.append((1, net_frames_before_rise))
            else:
                false_positive_wasnt_there += 1
                frames_before_rise.append((2, net_frames_before_rise))
        else:
            if not response_missing(rating) and not response_with_synonym(rating):
                false_negative += 1
                frames_before_rise.append((4, net_frames_before_rise))
            else:
                true_negative += 1
                frames_before_rise.append((3, net_frames_before_rise))
    return false_negative, false_positive_wasnt_there, false_positive_wrong_time, real_negative, real_positive, \
           true_negative, true_positive, np.array(segment_counts), np.array(distances), np.array(frames_before_rise)
Ejemplo n.º 4
0
def print_synonym_stats(ratings):
    synonyms = 0
    util.ensure_exists('test')
    for i, rating in enumerate(ratings):
        if not response_missing(rating) and response_with_synonym(rating):
            synonyms += 1
            print("{i:04d}: {0} -> {1}".format(rating.word,
                                               rating.synonym,
                                               i=i))
            data, rate = soundfile.read(rating.wav_path, always_2d=1)
            segment = data[time2sample(rating.p_delay, rate):time2sample(
                rating.p_delay + rating.duration, rate), 0]
            sound_util.write_array_to_wav(
                os.path.join('test', "{i:04d}".format(i=i)), segment, rate)
    print(synonyms, synonyms / len(ratings))
Ejemplo n.º 5
0
            def plot_dists(savefig=False):
                import matplotlib.pyplot as plt
                from matplotlib import rc

                rc('text', usetex=True)
                rc('font', size=12)
                rc('legend', fontsize=12)
                font = {'family': 'serif', 'serif': ['cmr10']}
                rc('font', **font)

                if not response_missing(rating) and not response_with_synonym(
                        rating):
                    p_delay_adjusted = rating.p_delay
                    plt.axvline(p_delay_adjusted,
                                color='xkcd:bright lavender',
                                dashes=[5, 5],
                                zorder=2,
                                label='Word start')
                    plt.axvline(p_delay_adjusted + rating.duration,
                                color='xkcd:light grass green',
                                dashes=[1, 1],
                                zorder=2,
                                label='Word end')

                plt.plot([frames2time(x) + skip_starting for x in starts],
                         stacked_dists[:, -1],
                         zorder=1,
                         color='xkcd:cobalt blue')
                plt.axvline(best_start,
                            color='xkcd:lightish red',
                            dashes=[1, 0],
                            zorder=2,
                            label='Word start guess')
                plt.xlabel('Time (s)')
                plt.ylabel('Avg distance to reference examples')
                plt.legend()
                if savefig:
                    plt.savefig(
                        'plots_output/recording_dists_{0:04}.pdf'.format(
                            rating_idx),
                        dpi=300,
                        bbox_inches='tight',
                        pad_inches=0)
                else:
                    plt.show()
                plt.clf()
Ejemplo n.º 6
0
def print_p_delay_stats(ratings):
    p_delays = []
    for rating in ratings:
        if not response_missing(rating):
            if rating.p_delay > 50:
                key = '{0}_{1}_snodgrass_{2}_{3}'.format(
                    rating.word, rating.order, rating.vp, rating.date)
                print(key, rating.p_delay, rating.duration, rating.comment)
            else:
                p_delays.append(rating.p_delay)

    p_delays = np.array(p_delays)

    print("min={min:.3f}, median={med:.3f}, mean={mean:.3f}, max={max:.3f}".
          format(min=p_delays.min(),
                 med=np.median(p_delays),
                 mean=p_delays.mean(),
                 max=p_delays.max()))
Ejemplo n.º 7
0
def ratings_stats(ratings_file):
    with open(os.path.join(processed_data_dir, ratings_file), 'rb') as f:
        ratings: List[SnodgrassWordRating] = pickle.load(f)

    bad_files = 0
    very_bad_files = 0
    p_scores = np.zeros(4, dtype=np.int32)
    for rating in ratings:
        if 'abgeschnitten' in rating.comment:
            bad_files += 1
        if rating.wav_path is None:
            very_bad_files += 1
        if not response_missing(rating):
            idx = rating.p_score - 1
            if idx > 3 or idx < 0:
                raise RuntimeError('Invalid rating: {0}'.format(idx))
            p_scores[idx] += 1

    print('Bad files {0}/{1}'.format(bad_files, len(ratings)))
    print('Very bad files {0}/{1}'.format(very_bad_files, len(ratings)))
    return p_scores
Ejemplo n.º 8
0
def net_annotate_sliding_window_framewise(run_dir,
                                          run_epoch,
                                          ratings_file_or_object,
                                          skip_starting=0,
                                          reference_vecs_override=None,
                                          reference_word_idxs_override=None,
                                          save=True,
                                          ratings_name=None,
                                          output_dir=None,
                                          plot_mode=False):
    time_start = time.time()

    if save:
        if ratings_name is None:
            ratings_name = os.path.basename(ratings_file_or_object)

        if output_dir is None:
            output_dir = 'output'
        util.ensure_exists(output_dir)

        output_file = '{0}_epoch_{1}_{2}_full{3}_skip{4:.3f}.netrating_faster' \
            .format(os.path.basename(run_dir), run_epoch, ratings_name,
                    'own_segmentation', skip_starting)
        output_file = os.path.join(output_dir, output_file)

    net, config, _, _, _, _, train_scp, feature_mean, feature_std, word2id, mean_sub, var_norm = \
        load_net(run_dir, epoch=run_epoch, logger=None, train=False)
    ratings, vecs_train, word_idxs_train = load_common_rating_data(
        ratings_file_or_object, run_dir, run_epoch)
    word_lengths = load_pickled(scp2word_lengths_file(train_scp))

    reference_vecs = reference_vecs_override if reference_vecs_override is not None else vecs_train
    reference_word_idxs = reference_word_idxs_override if reference_word_idxs_override is not None else word_idxs_train

    output: List[List[NetAnnotatedSegment]] = [[] for _ in range(len(ratings))]
    for rating, rating_idx, start_sec, end_sec, audio, sample_rate in \
            plain_audio_generator(ratings, skip_starting=skip_starting):
        if audio.shape[0] == 0:
            output[rating_idx].append(
                NetAnnotatedSegment(0, 0, 0, np.array([1000]),
                                    np.array([1000]), rating.word, rating.vp,
                                    rating.date, rating.wav_path))
        else:
            mean_length, max_length = word_lengths[rating.word]
            spacing_frames = 5
            # TODO: half of mean duration may not be the best choice for every word
            duration_frames = time2frames(mean_length / 2)

            full_features = new_features(audio, sample_rate, feature_mean,
                                         feature_std, mean_sub, var_norm)
            starts = subsegment_starts(full_features.shape[0], duration_frames,
                                       spacing_frames)

            # much faster than segmenting first and then getting the features of each small segment
            features = [(full_features[s:s + duration_frames]) for s in starts]
            num_segments = len(features)

            subsegment_portions = [0.75, 0.5, 0.25]
            size_multiplier = len(subsegment_portions) + 1

            features_plus_extra, lengths = stack_features(
                features, num_segments, duration_frames, subsegment_portions,
                size_multiplier)

            stacked_dists = evaluate_stacked_features(
                net, config, features_plus_extra, lengths, rating,
                reference_vecs, reference_word_idxs, num_segments,
                size_multiplier)

            bs_idx, best_start, best_end, best_duration = \
                select_best_segmentation(stacked_dists, starts, end_sec, max_length, skip_starting)
            best_duration_frames = time2frames(best_duration)

            def plot_dists(savefig=False):
                import matplotlib.pyplot as plt
                from matplotlib import rc

                rc('text', usetex=True)
                rc('font', size=12)
                rc('legend', fontsize=12)
                font = {'family': 'serif', 'serif': ['cmr10']}
                rc('font', **font)

                if not response_missing(rating) and not response_with_synonym(
                        rating):
                    p_delay_adjusted = rating.p_delay
                    plt.axvline(p_delay_adjusted,
                                color='xkcd:bright lavender',
                                dashes=[5, 5],
                                zorder=2,
                                label='Word start')
                    plt.axvline(p_delay_adjusted + rating.duration,
                                color='xkcd:light grass green',
                                dashes=[1, 1],
                                zorder=2,
                                label='Word end')

                plt.plot([frames2time(x) + skip_starting for x in starts],
                         stacked_dists[:, -1],
                         zorder=1,
                         color='xkcd:cobalt blue')
                plt.axvline(best_start,
                            color='xkcd:lightish red',
                            dashes=[1, 0],
                            zorder=2,
                            label='Word start guess')
                plt.xlabel('Time (s)')
                plt.ylabel('Avg distance to reference examples')
                plt.legend()
                if savefig:
                    plt.savefig(
                        'plots_output/recording_dists_{0:04}.pdf'.format(
                            rating_idx),
                        dpi=300,
                        bbox_inches='tight',
                        pad_inches=0)
                else:
                    plt.show()
                plt.clf()

            if plot_mode:
                if not response_missing(rating) and not response_with_synonym(
                        rating):
                    plot_dists(savefig=True)
                if rating_idx >= 10:
                    break

            dists_best_guess, features_best_guess = \
                evaluate_stepwise(net, config, full_features[starts[bs_idx]:starts[bs_idx] + best_duration_frames],
                                  rating, reference_vecs, reference_word_idxs)

            output[rating_idx].append(
                NetAnnotatedSegment(best_start, best_end, 0, dists_best_guess,
                                    features_best_guess[:, 0, :].mean(axis=1),
                                    rating.word, rating.vp, rating.date,
                                    rating.wav_path))
        print('Finished rating number {0}'.format(rating_idx + 1))

    if save and not plot_mode:
        save_pickled(output, output_file)
        if hasattr(net, 'beta'):
            beta = net.beta.detach().cpu().numpy()
            beta_out_file = os.path.join(
                output_dir,
                "{0}_epoch_{1}.beta".format(os.path.basename(run_dir),
                                            run_epoch))
            with open(beta_out_file, 'wb') as f:
                np.save(f, beta)

    print('Elapsed sec: {0:.3f}'.format(time.time() - time_start))
    return output, net.beta.detach().cpu().numpy() if hasattr(net,
                                                              'beta') else None