コード例 #1
0
def generate_nonlin_stats_plots(exp_dir,
                                output_dir,
                                plot,
                                comparison_dir=None,
                                start_iter=1,
                                latex_report=None):
    assert start_iter >= 1

    comparison_dir = [] if comparison_dir is None else comparison_dir
    dirs = [exp_dir] + comparison_dir
    index = 0
    stats_per_dir = {}

    for dir in dirs:
        stats_per_component_per_iter = (
            log_parse.parse_progress_logs_for_nonlinearity_stats(dir))
        for key in stats_per_component_per_iter:
            if len(stats_per_component_per_iter[key]['stats']) == 0:
                logger.warning("Couldn't find any rows for the"
                               "nonlin stats plot, not generating it")
        stats_per_dir[dir] = stats_per_component_per_iter
    # convert the nonlin stats into tables
    stat_tables_per_component_per_dir = {}
    for dir in dirs:
        stats_per_component_per_iter = stats_per_dir[dir]
        component_names = stats_per_component_per_iter.keys()
        stat_tables_per_component = {}
        for component_name in component_names:
            comp_data = stats_per_component_per_iter[component_name]
            comp_type = comp_data['type']
            comp_stats = comp_data['stats']
            iters = comp_stats.keys()
            iters.sort()
            iter_stats = []
            for iter in iters:
                iter_stats.append([iter] + comp_stats[iter])
            stat_tables_per_component[component_name] = iter_stats
        stat_tables_per_component_per_dir[dir] = stat_tables_per_component

    main_stat_tables = stat_tables_per_component_per_dir[exp_dir]
    for component_name in main_stat_tables.keys():
        # this is the main experiment directory
        with open(
                "{dir}/nonlinstats_{comp_name}.log".format(
                    dir=output_dir, comp_name=component_name), "w") as f:
            f.write(
                "Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\t"
                "Value_5th\tValue_50th\tValue_95th\t"
                "Deriv_5th\tDeriv_50th\tDeriv_95th\n")
            iter_stat_report = []
            iter_stats = main_stat_tables[component_name]
            for row in iter_stats:
                iter_stat_report.append("\t".join([str(x) for x in row]))
            f.write("\n".join(iter_stat_report))
            f.close()
    if plot:
        main_component_names = main_stat_tables.keys()
        main_component_names.sort()

        plot_component_names = set(main_component_names)
        for dir in dirs:
            component_names = set(stats_per_dir[dir].keys())
            plot_component_names = plot_component_names.intersection(
                component_names)
        plot_component_names = list(plot_component_names)
        plot_component_names.sort()
        if plot_component_names != main_component_names:
            logger.warning("""The components in all the neural networks in the
            given experiment dirs are not the same, so comparison plots are
            provided only for common component names. Make sure that these are
            comparable experiments before analyzing these plots.""")

        fig = plt.figure()

        common_prefix = os.path.commonprefix(dirs)
        prefix_length = common_prefix.rfind('/')
        common_prefix = common_prefix[0:prefix_length]

        for component_name in main_component_names:
            if stats_per_dir[exp_dir][component_name][
                    'type'] == 'LstmNonlinearity':
                for i in range(0, 5):
                    component_type = 'Lstm-' + g_lstm_gate[i]
                    lgd = plot_a_nonlin_component(
                        fig, dirs, stat_tables_per_component_per_dir,
                        component_name, common_prefix, prefix_length,
                        component_type, start_iter, i)
                    fig.suptitle(
                        "Per-dimension average-(value, derivative) percentiles for "
                        "{component_name}-{gate}".format(
                            component_name=component_name,
                            gate=g_lstm_gate[i]))
                    comp_name = latex_compliant_name(component_name)
                    figfile_name = '{dir}/nonlinstats_{comp_name}_{gate}.pdf'.format(
                        dir=output_dir,
                        comp_name=comp_name,
                        gate=g_lstm_gate[i])
                    fig.savefig(figfile_name,
                                bbox_extra_artists=(lgd, ),
                                bbox_inches='tight')
                    if latex_report is not None:
                        latex_report.add_figure(
                            figfile_name,
                            "Per-dimension average-(value, derivative) percentiles for "
                            "{0}-{1}".format(component_name, g_lstm_gate[i]))
            else:
                component_type = stats_per_dir[exp_dir][component_name]['type']
                lgd = plot_a_nonlin_component(
                    fig, dirs, stat_tables_per_component_per_dir,
                    component_name, common_prefix, prefix_length,
                    component_type, start_iter, 0)
                fig.suptitle(
                    "Per-dimension average-(value, derivative) percentiles for "
                    "{component_name}".format(component_name=component_name))
                comp_name = latex_compliant_name(component_name)
                figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format(
                    dir=output_dir, comp_name=comp_name)
                fig.savefig(figfile_name,
                            bbox_extra_artists=(lgd, ),
                            bbox_inches='tight')
                if latex_report is not None:
                    latex_report.add_figure(
                        figfile_name,
                        "Per-dimension average-(value, derivative) percentiles for "
                        "{0}".format(component_name))
コード例 #2
0
ファイル: generate_plots.py プロジェクト: yetigeti/kaldi
def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None,
                                start_iter=1, latex_report=None):
    assert start_iter >= 1

    comparison_dir = [] if comparison_dir is None else comparison_dir
    dirs = [exp_dir] + comparison_dir
    index = 0
    stats_per_dir = {}

    for dir in dirs:
        stats_per_component_per_iter = (
            log_parse.parse_progress_logs_for_nonlinearity_stats(dir))
        stats_per_dir[dir] = stats_per_component_per_iter

    # convert the nonlin stats into tables
    stat_tables_per_component_per_dir = {}
    for dir in dirs:
        stats_per_component_per_iter = stats_per_dir[dir]
        component_names = stats_per_component_per_iter.keys()
        stat_tables_per_component = {}
        for component_name in component_names:
            comp_data = stats_per_component_per_iter[component_name]
            comp_type = comp_data['type']
            comp_stats = comp_data['stats']
            iters = comp_stats.keys()
            iters.sort()
            iter_stats = []
            for iter in iters:
                iter_stats.append([iter] + comp_stats[iter])
            stat_tables_per_component[component_name] = iter_stats
        stat_tables_per_component_per_dir[dir] = stat_tables_per_component

    main_stat_tables = stat_tables_per_component_per_dir[exp_dir]
    for component_name in main_stat_tables.keys():
        # this is the main experiment directory
        with open("{dir}/nonlinstats_{comp_name}.log".format(
                    dir=output_dir, comp_name=component_name), "w") as f:
            f.write(
                "Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\n")
            iter_stat_report = []
            iter_stats = main_stat_tables[component_name]
            for row in iter_stats:
                iter_stat_report.append("\t".join([str(x) for x in row]))
            f.write("\n".join(iter_stat_report))
            f.close()

    if plot:
        main_component_names = main_stat_tables.keys()
        main_component_names.sort()

        plot_component_names = set(main_component_names)
        for dir in dirs:
            component_names = set(stats_per_dir[dir].keys())
            plot_component_names = plot_component_names.intersection(
                component_names)
        plot_component_names = list(plot_component_names)
        plot_component_names.sort()
        if plot_component_names != main_component_names:
            logger.warning("""The components in all the neural networks in the
            given experiment dirs are not the same, so comparison plots are
            provided only for common component names. Make sure that these are
            comparable experiments before analyzing these plots.""")

        fig = plt.figure()
        for component_name in main_component_names:
            fig.clf()
            index = 0
            plots = []
            for dir in dirs:
                color_val = g_plot_colors[index]
                index += 1
                try:
                    iter_stats = (
                        stat_tables_per_component_per_dir[dir][component_name])
                except KeyError:
                    # this component is not available in this network so lets
                    # not just plot it
                    continue

                data = np.array(iter_stats)
                data = data[data[:, 0] >= start_iter, :]
                ax = plt.subplot(211)
                mp, = ax.plot(data[:, 0], data[:, 1], color=color_val,
                              label="Mean {0}".format(dir))
                msph, = ax.plot(data[:, 0], data[:, 1] + data[:, 2],
                                color=color_val, linestyle='--',
                                label="Mean+-Stddev {0}".format(dir))
                mspl, = ax.plot(data[:, 0], data[:, 1] - data[:, 2],
                                color=color_val, linestyle='--')
                plots.append(mp)
                plots.append(msph)
                ax.set_ylabel('Value-{0}'.format(comp_type))
                ax.grid(True)

                ax = plt.subplot(212)
                mp, = ax.plot(data[:, 0], data[:, 3], color=color_val)
                msph, = ax.plot(data[:, 0], data[:, 3] + data[:, 4],
                                color=color_val, linestyle='--')
                mspl, = ax.plot(data[:, 0], data[:, 3] - data[:, 4],
                                color=color_val, linestyle='--')
                ax.set_xlabel('Iteration')
                ax.set_ylabel('Derivative-{0}'.format(comp_type))
                ax.grid(True)

            lgd = plt.legend(handles=plots, loc='lower center',
                             bbox_to_anchor=(0.5, -0.5 + len(dirs) * -0.2),
                             ncol=1, borderaxespad=0.)
            plt.grid(True)
            fig.suptitle("Mean and stddev of the value and derivative at "
                         "{comp_name}".format(comp_name=component_name))
            comp_name = latex_compliant_name(component_name)
            figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format(
                dir=output_dir, comp_name=comp_name)
            fig.savefig(figfile_name, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')
            if latex_report is not None:
                latex_report.add_figure(
                    figfile_name,
                    "Mean and stddev of the value and derivative "
                    "at {0}".format(component_name))
コード例 #3
0
ファイル: generate_plots.py プロジェクト: jcsilva/kaldi
def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None,
                                start_iter=1, latex_report=None):
    assert start_iter >= 1

    comparison_dir = [] if comparison_dir is None else comparison_dir
    dirs = [exp_dir] + comparison_dir
    index = 0
    stats_per_dir = {}

    for dir in dirs:
        stats_per_component_per_iter = (
            log_parse.parse_progress_logs_for_nonlinearity_stats(dir))
        for key in stats_per_component_per_iter:
            if len(stats_per_component_per_iter[key]['stats']) == 0:
                logger.warning("Couldn't find any rows for the"
                               "nonlin stats plot, not generating it")
        stats_per_dir[dir] = stats_per_component_per_iter
    # convert the nonlin stats into tables
    stat_tables_per_component_per_dir = {}
    for dir in dirs:
        stats_per_component_per_iter = stats_per_dir[dir]
        component_names = stats_per_component_per_iter.keys()
        stat_tables_per_component = {}
        for component_name in component_names:
            comp_data = stats_per_component_per_iter[component_name]
            comp_type = comp_data['type']
            comp_stats = comp_data['stats']
            iters = comp_stats.keys()
            iters.sort()
            iter_stats = []
            for iter in iters:
                iter_stats.append([iter] + comp_stats[iter])
            stat_tables_per_component[component_name] = iter_stats
        stat_tables_per_component_per_dir[dir] = stat_tables_per_component

    main_stat_tables = stat_tables_per_component_per_dir[exp_dir]
    for component_name in main_stat_tables.keys():
        # this is the main experiment directory
        with open("{dir}/nonlinstats_{comp_name}.log".format(
                    dir=output_dir, comp_name=component_name), "w") as f:
            f.write("Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\t"
                               "Value_5th\tValue_50th\tValue_95th\t"
                               "Deriv_5th\tDeriv_50th\tDeriv_95th\n")
            iter_stat_report = []
            iter_stats = main_stat_tables[component_name]
            for row in iter_stats:
                iter_stat_report.append("\t".join([str(x) for x in row]))
            f.write("\n".join(iter_stat_report))
            f.close()
    if plot:
        main_component_names = main_stat_tables.keys()
        main_component_names.sort()

        plot_component_names = set(main_component_names)
        for dir in dirs:
            component_names = set(stats_per_dir[dir].keys())
            plot_component_names = plot_component_names.intersection(
                component_names)
        plot_component_names = list(plot_component_names)
        plot_component_names.sort()
        if plot_component_names != main_component_names:
            logger.warning("""The components in all the neural networks in the
            given experiment dirs are not the same, so comparison plots are
            provided only for common component names. Make sure that these are
            comparable experiments before analyzing these plots.""")

        fig = plt.figure()

        common_prefix = os.path.commonprefix(dirs)
        prefix_length = common_prefix.rfind('/')
        common_prefix = common_prefix[0:prefix_length]

        for component_name in main_component_names:
            if stats_per_dir[exp_dir][component_name]['type'] == 'LstmNonlinearity':
                for i in range(0,5):
                    component_type = 'Lstm-' + g_lstm_gate[i]
                    lgd = plot_a_nonlin_component(fig, dirs,
                            stat_tables_per_component_per_dir, component_name,
                            common_prefix, prefix_length, component_type, start_iter, i)
                    fig.suptitle("Per-dimension average-(value, derivative) percentiles for "
                         "{component_name}-{gate}".format(component_name=component_name, gate=g_lstm_gate[i]))
                    comp_name = latex_compliant_name(component_name)
                    figfile_name = '{dir}/nonlinstats_{comp_name}_{gate}.pdf'.format(
                        dir=output_dir, comp_name=comp_name, gate=g_lstm_gate[i])
                    fig.savefig(figfile_name, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')
                    if latex_report is not None:
                        latex_report.add_figure(
                        figfile_name,
                        "Per-dimension average-(value, derivative) percentiles for "
                        "{0}-{1}".format(component_name, g_lstm_gate[i]))
            else:
                component_type = stats_per_dir[exp_dir][component_name]['type']
                lgd = plot_a_nonlin_component(fig, dirs,
                        stat_tables_per_component_per_dir,component_name,
                        common_prefix, prefix_length, component_type, start_iter, 0)
                fig.suptitle("Per-dimension average-(value, derivative) percentiles for "
                         "{component_name}".format(component_name=component_name))
                comp_name = latex_compliant_name(component_name)
                figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format(
                    dir=output_dir, comp_name=comp_name)
                fig.savefig(figfile_name, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')
                if latex_report is not None:
                    latex_report.add_figure(
                    figfile_name,
                    "Per-dimension average-(value, derivative) percentiles for "
                    "{0}".format(component_name))
コード例 #4
0
ファイル: generate_plots.py プロジェクト: bezhvin/Kaldi
def generate_nonlin_stats_plots(exp_dir, output_dir, plot, comparison_dir=None,
                                start_iter=1, latex_report=None):
    assert start_iter >= 1

    comparison_dir = [] if comparison_dir is None else comparison_dir
    dirs = [exp_dir] + comparison_dir
    index = 0
    stats_per_dir = {}

    for dir in dirs:
        stats_per_component_per_iter = (
            log_parse.parse_progress_logs_for_nonlinearity_stats(dir))
        stats_per_dir[dir] = stats_per_component_per_iter

    # convert the nonlin stats into tables
    stat_tables_per_component_per_dir = {}
    for dir in dirs:
        stats_per_component_per_iter = stats_per_dir[dir]
        component_names = stats_per_component_per_iter.keys()
        stat_tables_per_component = {}
        for component_name in component_names:
            comp_data = stats_per_component_per_iter[component_name]
            comp_type = comp_data['type']
            comp_stats = comp_data['stats']
            iters = comp_stats.keys()
            iters.sort()
            iter_stats = []
            for iter in iters:
                iter_stats.append([iter] + comp_stats[iter])
            stat_tables_per_component[component_name] = iter_stats
        stat_tables_per_component_per_dir[dir] = stat_tables_per_component

    main_stat_tables = stat_tables_per_component_per_dir[exp_dir]
    for component_name in main_stat_tables.keys():
        # this is the main experiment directory
        with open("{dir}/nonlinstats_{comp_name}.log".format(
                    dir=output_dir, comp_name=component_name), "w") as f:
            f.write(
                "Iteration\tValueMean\tValueStddev\tDerivMean\tDerivStddev\n")
            iter_stat_report = []
            iter_stats = main_stat_tables[component_name]
            for row in iter_stats:
                iter_stat_report.append("\t".join([str(x) for x in row]))
            f.write("\n".join(iter_stat_report))
            f.close()

    if plot:
        main_component_names = main_stat_tables.keys()
        main_component_names.sort()

        plot_component_names = set(main_component_names)
        for dir in dirs:
            component_names = set(stats_per_dir[dir].keys())
            plot_component_names = plot_component_names.intersection(
                component_names)
        plot_component_names = list(plot_component_names)
        plot_component_names.sort()
        if plot_component_names != main_component_names:
            logger.warning("""The components in all the neural networks in the
            given experiment dirs are not the same, so comparison plots are
            provided only for common component names. Make sure that these are
            comparable experiments before analyzing these plots.""")

        fig = plt.figure()
        for component_name in main_component_names:
            fig.clf()
            index = 0
            plots = []
            for dir in dirs:
                color_val = g_plot_colors[index]
                index += 1
                try:
                    iter_stats = (
                        stat_tables_per_component_per_dir[dir][component_name])
                except KeyError:
                    # this component is not available in this network so lets
                    # not just plot it
                    continue

                data = np.array(iter_stats)
                data = data[data[:, 0] >= start_iter, :]
                ax = plt.subplot(211)
                mp, = ax.plot(data[:, 0], data[:, 1], color=color_val,
                              label="Mean {0}".format(dir))
                msph, = ax.plot(data[:, 0], data[:, 1] + data[:, 2],
                                color=color_val, linestyle='--',
                                label="Mean+-Stddev {0}".format(dir))
                mspl, = ax.plot(data[:, 0], data[:, 1] - data[:, 2],
                                color=color_val, linestyle='--')
                plots.append(mp)
                plots.append(msph)
                ax.set_ylabel('Value-{0}'.format(comp_type))
                ax.grid(True)

                ax = plt.subplot(212)
                mp, = ax.plot(data[:, 0], data[:, 3], color=color_val)
                msph, = ax.plot(data[:, 0], data[:, 3] + data[:, 4],
                                color=color_val, linestyle='--')
                mspl, = ax.plot(data[:, 0], data[:, 3] - data[:, 4],
                                color=color_val, linestyle='--')
                ax.set_xlabel('Iteration')
                ax.set_ylabel('Derivative-{0}'.format(comp_type))
                ax.grid(True)

            lgd = plt.legend(handles=plots, loc='lower center',
                             bbox_to_anchor=(0.5, -0.5 + len(dirs) * -0.2),
                             ncol=1, borderaxespad=0.)
            plt.grid(True)
            fig.suptitle("Mean and stddev of the value and derivative at "
                         "{comp_name}".format(comp_name=component_name))
            comp_name = latex_compliant_name(component_name)
            figfile_name = '{dir}/nonlinstats_{comp_name}.pdf'.format(
                dir=output_dir, comp_name=comp_name)
            fig.savefig(figfile_name, bbox_extra_artists=(lgd,),
                        bbox_inches='tight')
            if latex_report is not None:
                latex_report.add_figure(
                    figfile_name,
                    "Mean and stddev of the value and derivative "
                    "at {0}".format(component_name))