Ejemplo n.º 1
0
def process_stock(stock_no, days):
    """处理单个stock"""
    csv_file = "%s/convert_%s.csv" % (conf.HISTORY_CONVERTED_PATH, stock_no)
    rows = common.load_csv(csv_file, fields)
    rows = list(rows)
    for i, row in enumerate(rows):
        print rolling(rows, i)
Ejemplo n.º 2
0
            def plot_timeseries(v):
                figname = v.csv_file.replace('.csv', '.png')
                if os.path.exists(figname):
                    return

                fig2 = plt.figure(num=2)
                plt.clf()
                ax2 = fig2.add_subplot(1, 1, 1)

                ts = load_csv(v.csv_file)
                # convert units for easier reading of graphs
                ts.index = convert_time_units_series(ts.index, years=True)
                # ts.plot(ax=ax2, label=v.simulation_set)
                l, = ax2.plot(ts.index, ts.values, label=v.simulation_set)
                ax2.legend()
                ax2.set_xlabel(ts.index.name)
                ax2.set_ylabel(v.units)
                ax2.set_title(name)

                # add yearly mean as well
                if o.yearly_mean:
                    yearly_mean = ts.rolling(12).mean()
                    l2, = ax2.plot(ts.index[::12],
                                   yearly_mean[::12],
                                   alpha=1,
                                   linewidth=2,
                                   color=l.get_color())

                if o.png_timeseries:
                    fig2.savefig(figname, dpi=o.dpi)
Ejemplo n.º 3
0
def get_freqstats_per_scenario_per_test(sc_list, freq_metrics):
    print "get_freqstats_per_scenario_per_test:Enter"

    all_freqdist_data = collections.OrderedDict()
    for ix, each_scenario in enumerate(sc_list):
        print each_scenario
        all_freqdist_data[each_scenario] = collections.OrderedDict()
        for each_mifint_freq_str in CUSTOM_CROPPING_PARAMS_ALL[
                each_scenario].keys():
            all_freqdist_data[each_scenario][each_mifint_freq_str] = {}

            DEVFREQ_MIFINT_PAIRS = [
                int(s) if (("default" not in s) and ("test" not in s)) else s
                for s in each_mifint_freq_str.split("-")
            ]
            MIF_FREQ = DEVFREQ_MIFINT_PAIRS[0]
            INT_FREQ = DEVFREQ_MIFINT_PAIRS[1]

            DATA_DIR = BASE_DATA_DIR + each_scenario + "/"
            cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
                MIF_FREQ, INT_FREQ)
            mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
                MIF_FREQ, INT_FREQ)

            (c, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

            for each_fmetric in freq_metrics:

                freq_data_dict = {
                    'sum':
                    np.sum(perfdata[each_fmetric]),
                    'mode':
                    _counter_mode(perfdata[each_fmetric]),
                    'mean':
                    np.mean(perfdata[each_fmetric]),
                    'min':
                    np.min(perfdata[each_fmetric]),
                    'max':
                    np.max(perfdata[each_fmetric]),
                    'counter':
                    collections.Counter(perfdata[each_fmetric]),
                    'transitions':
                    np.count_nonzero(np.diff(perfdata[each_fmetric]))
                }
                all_freqdist_data[each_scenario][each_mifint_freq_str][
                    each_fmetric] = freq_data_dict

    return all_freqdist_data
def compute_corrmatrix_all_scenarios(sc_list, met_list):
    MIF_FREQ = "default"
    INT_FREQ = "default"

    all_sc_corr_mat = collections.OrderedDict()
    for each_scenario in sc_list:
        DATA_DIR = BASE_DATA_DIR + each_scenario + "/"

        cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
            MIF_FREQ, INT_FREQ)
        mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
            MIF_FREQ, INT_FREQ)

        (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

        lbl = "{0}:mif-{1}:int-{2}".format(SCENARIO_ID, MIF_FREQ, INT_FREQ)

        corr = compute_correlation_matrix(perfdata, met_list)[2]
        all_sc_corr_mat[each_scenario] = corr

    return all_sc_corr_mat
Ejemplo n.º 5
0
def plot_cpugpumem_dist_all_scenarios(sc_list, mif_freq, int_freq):
    fig, axs = plt.subplots(5, 3, figsize=(8 * 1.2, 8 * 1.2), sharex=True)

    axs = axs.ravel()

    for ix, each_scenario in enumerate(sc_list):
        DATA_DIR = BASE_DATA_DIR + each_scenario + "/"
        cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
            mif_freq, int_freq)
        mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
            mif_freq, int_freq)
        (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

        cpu_util = perfdata['cpu_cost']
        gpu_util = perfdata['gpu_cost']
        mem_util = perfdata['mem_cost']
        bus_util = perfdata['sat_cost']

        y_data = [cpu_util, gpu_util, bus_util]
        pos = np.arange(1, len(y_data) + 1)
        xticklbls = [
            'cpu_cost',
            'gpu_cost',
            #'mem_cost',
            'bus_cost'
        ]
        axs[ix].boxplot(y_data, positions=pos)
        axs[ix].set_xticks(pos)
        ymax = 80.0 if np.max([np.max(m) for m in y_data]) < 80.0 else np.max(
            [np.max(m) for m in y_data])

        axs[ix].set_ylim([-0.5, ymax])
        axs[ix].set_title(each_scenario)

    axs[-1].set_xticklabels(xticklbls, rotation=35)
    axs[-2].set_xticklabels(xticklbls, rotation=35)
    axs[-3].set_xticklabels(xticklbls, rotation=35)
Ejemplo n.º 6
0
def main():
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('csv_file', nargs='+')
    parser.add_argument('--yearly-mean', action='store_true')
    parser.add_argument('--png', help='file name for saving png figure')
    o = parser.parse_args()

    fig2 = plt.figure()
    ax2 = plt.subplot(1, 1, 1)

    # compare string to find label
    records = [get_metadata(csv_file) for csv_file in o.csv_file]
    keep_label = {
        field: len(set(r[field] for r in records)) > 1
        for field in records[0]
    }

    for csv_file in o.csv_file:
        ts = load_csv(csv_file)
        ts.index = convert_time_units_series(ts.index, years=True)

        record = get_metadata(csv_file)
        label = ', '.join(
            str(value) for key, value in record.items() if keep_label.get(key))

        # name = ts.columns[1]
        path = Path(csv_file)

        name = path.name
        cname = ts.name
        color = None
        zorder = None

        # units = re.match(r'', ts.columns[1])
        try:
            units, = re.match(r'.* \((.*)\)', cname).groups()
        except:
            logging.warning(f'failed to parse units: {cname}')
            units = ''

        l, = ax2.plot(ts.index,
                      ts.values,
                      alpha=0.5,
                      label=label,
                      linewidth=1 if o.yearly_mean else 2,
                      color=color,
                      zorder=zorder)

        # add yearly mean as well
        if o.yearly_mean:
            yearly_mean = ts.rolling(12).mean()
            l2, = ax2.plot(ts.index[::12],
                           yearly_mean[::12],
                           alpha=1,
                           linewidth=2,
                           color=l.get_color(),
                           zorder=zorder)

    ax2.legend(fontsize='xx-small')
    ax2.set_ylabel(units)
    ax2.set_xlabel(ts.index.name)
    ax2.set_title(name)
    # ax2.set_xlim(xmin=start_year, xmax=2100)

    mi, ma = ax2.get_xlim()
    if mi < 0:
        ax2.set_xlim(xmin=0)  # start at start_year (i.e. ERA5 start)

    if o.png:
        figname = o.png
        fig2.savefig(figname, dpi=300)

    else:
        plt.show()
Ejemplo n.º 7
0
def plot_freq_time_in_state(sc_list,
                            metric,
                            TMP_MIF_FREQ="default",
                            TMP_INT_FREQ="default"):

    f, axarr = plt.subplots(2, 8, sharex=True, sharey=True, figsize=(16, 10))
    f.canvas.set_window_title('plot_time_in_state -' + metric)
    axarr = axarr.ravel()

    # get colors and freq list
    (freq_list, colsd) = get_allfreq_list(metric)

    for ix, each_scenario in enumerate(sc_list):
        print each_scenario
        DATA_DIR = BASE_DATA_DIR + each_scenario + "/"
        cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
            TMP_MIF_FREQ, TMP_INT_FREQ)
        mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
            TMP_MIF_FREQ, TMP_INT_FREQ)

        try:
            axarr[ix].set_title(each_scenario, fontsize=12)
            (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)
            freq_data = perfdata[metric]

            freq_data_dict = collections.OrderedDict()
            for each_freq in freq_list:
                fcount = freq_data.count(each_freq)
                if fcount > 0:
                    freq_data_dict[each_freq] = fcount

            print(json.dumps(freq_data_dict, indent=4))

            cols = [colsd[f] for f in freq_data_dict.keys()]
            pdata = freq_data_dict.values()

            lbls = [str(k) for k in freq_data_dict.keys()]
            wedges, plt_labels, junk = axarr[ix].pie(pdata,
                                                     autopct=my_autopct,
                                                     colors=cols)
            for t in plt_labels:
                t.set_fontsize(10)

        except:
            continue

        axarr[ix].set_title(each_scenario, fontsize=12)

    rects_list = []
    rect_lbl_list = []
    for ix, each_f in enumerate(freq_list):
        rec = patches.Rectangle((0.72, 0.1), 0.2, 0.6, facecolor=colsd[each_f])
        rects_list.append(rec)
        rect_lbl_list.append(str(each_f))

    nlines = len(freq_list)
    ncol = int(np.ceil(nlines / 2.))
    leg = plt.figlegend(rects_list,
                        rect_lbl_list,
                        loc='upper center',
                        ncol=ncol,
                        labelspacing=0.,
                        fontsize=14)
    leg.draggable()

    plt.subplots_adjust(hspace=0.09,
                        wspace=0.00,
                        top=0.87,
                        bottom=0.0,
                        left=0.0,
                        right=1.00)
Ejemplo n.º 8
0
def get_metric_slope_analysis(sc_list):
    print "get_freqstats_per_scenario_per_test:Enter"

    all_freqdist_data = {}
    for ix, each_scenario in enumerate(sc_list):
        print each_scenario
        all_freqdist_data[each_scenario] = collections.OrderedDict()

        DEVFREQ_MIFINT_PAIRS = ["default", "default"]
        MIF_FREQ = DEVFREQ_MIFINT_PAIRS[0]
        INT_FREQ = DEVFREQ_MIFINT_PAIRS[1]

        DATA_DIR = BASE_DATA_DIR + each_scenario + "/"
        cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
            MIF_FREQ, INT_FREQ)
        mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
            MIF_FREQ, INT_FREQ)

        (c, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

        interested_metrics = [
            'cpu_util', 'sat_total', 'bus_mif_freq', 'bus_int_freq', 'cpu_freq'
        ]

        # get slope for interested metrics
        tmp_dict = {}  # 1 or -1 or 0 per tick
        for each_metric in interested_metrics:
            tmp_dict[each_metric] = []
            diff = np.diff(perfdata[each_metric])

            # decrease precision
            if (each_metric == "cpu_util") or (each_metric == "sat_total"):
                diff = [x if abs(x) > 5.0 else 0.0 for x in diff]
            else:
                pass

            for x in diff:
                if x == 0:  # no change
                    tmp_dict[each_metric].append(0)
                elif x > 0:  # positive
                    tmp_dict[each_metric].append(1)
                elif x < 0:  # negative
                    tmp_dict[each_metric].append(-1)
                else:
                    pass

        # construct result [1 2 3 4]
        samples = len(tmp_dict[interested_metrics[0]])
        result = []
        for i in xrange(samples):
            if (tmp_dict['cpu_util'][i]
                    == 0) and (tmp_dict['sat_total'][i]
                               == -1) and (tmp_dict['bus_mif_freq'][i]
                                           == -1) and (tmp_dict['cpu_freq'][i]
                                                       == 1):
                result.append(1)
            elif (tmp_dict['cpu_util'][i]
                  == -1) and (tmp_dict['sat_total'][i]
                              == -1) and (tmp_dict['bus_mif_freq'][i]
                                          == -1) and (tmp_dict['cpu_freq'][i]
                                                      == 0):
                result.append(2)

            elif (tmp_dict['cpu_util'][i]
                  in [0, -1]) and (tmp_dict['bus_mif_freq'][i]
                                   == -1) and (tmp_dict['cpu_freq'][i] == 0):
                result.append(3)

            elif (tmp_dict['cpu_util'][i]
                  in [0, -1]) and (tmp_dict['bus_mif_freq'][i]
                                   == -1) and (tmp_dict['cpu_freq'][i] == 1):
                result.append(4)

            else:
                result.append(0)

        all_freqdist_data[each_scenario] = {
            #                                             'trans_type1' : result.count(1),
            #                                             'trans_type2' : result.count(2),
            #                                             'trans_type3' : result.count(2),
            #                                             'trans_type4' : result.count(2),
            'trans_type1_perc': float(result.count(1)) / float(len(result)),
            'trans_type2_perc': float(result.count(2)) / float(len(result)),
            'trans_type3_perc': float(result.count(3)) / float(len(result)),
            'trans_type4_perc': float(result.count(4)) / float(len(result)),
        }

    pprint.pprint(all_freqdist_data)
Ejemplo n.º 9
0
import pandas as pd
from sklearn import svm
import sklearn.preprocessing as pp
from common import load_csv, write_results_table

# Configuration
group_columns = ['LinkRef']
categorial_columns = []
meta_columns = ['JourneyLinkRef', 'JourneyRef', 'DateTime', 'LineDirectionLinkOrder', 'LinkName']

results = pd.DataFrame()

# Load and pre-process data
data = load_csv('data/4A_201701_Consistent.csv', 
                group_columns = group_columns, 
                categorial_columns = categorial_columns,
                meta_columns = meta_columns,
                n_lags = 20,
                n_headways = 0)

for group, X, Y, meta in data:

    # Split data into train and test    
    X_train, X_test = np.split(X, [int(.8*len(X))])
    Y_train, Y_test = np.split(Y, [int(.8*len(Y))])
    meta_train, meta_test = np.split(meta, [int(.8*len(meta))])

    # Train

    print('Train data set (size, features):',  X_train.shape)

    # Normalizing X and y:
Ejemplo n.º 10
0
            def plot_all_simulations():
                figname = os.path.join(o.output, loc_folder, asset_folder,
                                       'all_' + name + '.png')
                if os.path.exists(figname):
                    return

                fig3 = plt.figure(num=3)
                plt.clf()
                ax3 = fig3.add_subplot(1, 1, 1)
                for v in variables:
                    ts = load_csv(v.csv_file)
                    ts.index = convert_time_units_series(ts.index, years=True)
                    if isinstance(v.datasets[0], ERA5):
                        color = 'k'
                        zorder = 5
                    else:
                        color = None
                        zorder = None

                    # add yearly mean instead of monthly mean
                    if o.yearly_mean:
                        yearly_mean = ts.rolling(12).mean()
                        x = ts.index[::12]
                        y = yearly_mean[::12]
                    else:
                        x = ts.index
                        y = ts.values

                    l, = ax3.plot(x,
                                  y,
                                  alpha=0.5 if o.ensemble else 1,
                                  label=v.simulation_set,
                                  linewidth=1 if o.ensemble else 2,
                                  color=color,
                                  zorder=zorder)

                # Add ensemble mean
                if o.ensemble:
                    for experiment in ensemble_files:
                        df = load_csv(ensemble_files[experiment])
                        df.index = convert_time_units_series(df.index,
                                                             years=True)

                        if o.yearly_mean:
                            yearly_mean = df.rolling(12).mean()
                            x = df.index[::12]
                            y = yearly_mean.iloc[::12]
                        else:
                            x = df.index
                            y = df

                        l, = ax3.plot(x,
                                      y["median"],
                                      alpha=1,
                                      label=f"{experiment} (median)",
                                      linewidth=2,
                                      zorder=4)
                        ax3.plot(x,
                                 y["lower"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.plot(x,
                                 y["upper"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.fill_between(x,
                                         y["lower"],
                                         y["upper"],
                                         alpha=0.2,
                                         zorder=-1,
                                         color=l.get_color())

                ax3.legend(fontsize='xx-small')
                ax3.set_ylabel(v.units)
                ax3.set_xlabel(ts.index.name)
                ax3.set_title(name)
                # ax3.set_xlim(xmin=start_year, xmax=2100)

                mi, ma = ax3.get_xlim()
                if mi < 0:
                    ax3.set_xlim(
                        xmin=0)  # start at start_year (i.e. ERA5 start)

                if o.png_timeseries:
                    fig3.savefig(figname, dpi=max(o.dpi, 300))
Ejemplo n.º 11
0
def main():
    import argparse

    locations = yaml.safe_load(open('locations.yml'))
    variables_def = yaml.safe_load(open('indicators.yml'))
    assets = yaml.safe_load(open('assets.yml'))
    cmip6_yml = yaml.safe_load(open('cmip6.yml'))

    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--max-workers",
        type=int,
        default=4,
        help=
        "Number of parallel threads for data download. Hint: use `--max-workers 1` for serial downlaod."
    )
    # g = parser.add_argument_group('variables or asset')
    g = parser.add_mutually_exclusive_group(required=True)
    # g.add_argument('--era5', nargs='*', help='list of ERA5-monthly variables to download (original name, no correction)')
    # g.add_argument('--cmip6', nargs='*', help='list of CMIP6-monthly variables to download')
    g.add_argument('--indicators',
                   nargs='*',
                   default=[],
                   choices=[vdef['name'] for vdef in variables_def],
                   help='list of custom indicators to download')
    g.add_argument(
        '--asset',
        choices=list(assets.keys()),
        help=
        'pre-defined list of variables, defined in assets.yml (experimental)')

    parser.add_argument(
        '--dataset',
        choices=['era5', 'cmip6'],
        help='dataset in combination with for `--indicators` and `--asset`')
    parser.add_argument('-o',
                        '--output',
                        default='indicators',
                        help='output directory, default: %(default)s')
    parser.add_argument('--overwrite',
                        action='store_true',
                        help=argparse.SUPPRESS)

    g = parser.add_argument_group('location')
    g.add_argument('--location',
                   choices=[loc['name'] for loc in locations],
                   help='location name defined in locations.yml')
    g.add_argument('--lon', type=float)
    g.add_argument('--lat', type=float)

    g = parser.add_argument_group('area size controls')
    g.add_argument(
        '--area',
        nargs=4,
        type=float,
        help='area as four numbers: top, left, bottom, right (CDS convention)')
    g.add_argument(
        '--width-km',
        type=float,
        default=1000,
        help=
        "Width (km) around the selected location, when not provided by `area`. %(default)s km by default."
    )
    g.add_argument(
        '--view',
        nargs=4,
        type=float,
        help=
        'area for plot as four numbers: top, left, bottom, right (CDS convention)'
    )

    g = parser.add_argument_group('ERA5 control')
    # g.add_argument('--year', nargs='+', default=list(range(1979, 2019+1)), help='ERA5 years to download, default: %(default)s')
    g.add_argument('--year',
                   nargs='+',
                   default=list(range(1979, 2019 + 1)),
                   help=argparse.SUPPRESS)

    g = parser.add_argument_group('CMIP6 control')
    g.add_argument('--model',
                   nargs='*',
                   default=None,
                   choices=get_all_models())
    g.add_argument('--experiment',
                   nargs='*',
                   choices=cmip6_yml["experiments"],
                   default=['ssp5_8_5'])
    # g.add_argument('--period', default=None, help=argparse.SUPPRESS) # all CMIP6 models and future experiements share the same parameter...
    # g.add_argument('--historical', action='store_true', help='this flag provokes downloading historical data as well and extend back the CMIP6 timeseries to 1979')
    g.add_argument('--historical',
                   action='store_true',
                   default=True,
                   help=argparse.SUPPRESS)
    g.add_argument('--no-historical',
                   action='store_false',
                   dest='historical',
                   help=argparse.SUPPRESS)
    # g.add_argument('--bias-correction', action='store_true', help='align CMIP6 variables with matching ERA5')
    g.add_argument('--bias-correction',
                   action='store_true',
                   default=True,
                   help=argparse.SUPPRESS)
    g.add_argument('--no-bias-correction',
                   action='store_false',
                   dest='bias_correction',
                   help='suppress bias-correction for CMIP6 data')
    g.add_argument(
        '--reference-period',
        default=[1979, 2019],
        nargs=2,
        type=int,
        help='reference period for bias correction (default: %(default)s)')
    g.add_argument('--yearly-bias',
                   action='store_true',
                   help='yearly instead of monthly bias correction')
    g.add_argument(
        '--ensemble',
        action='store_true',
        help=
        'If `--model` is not specified, default to all available models. Also write a csv file with all models as columns, as well as median, lower and upper (5th and 95th percentiles) fields.'
    )

    g = parser.add_argument_group('visualization')
    g.add_argument('--view-region', action='store_true')
    g.add_argument('--view-timeseries', action='store_true')
    g.add_argument('--png-region', action='store_true')
    g.add_argument('--png-timeseries', action='store_true')
    g.add_argument('--dpi',
                   default=100,
                   type=int,
                   help='dop-per-inches (default: %(default)s)')
    g.add_argument('--yearly-mean', action='store_true')

    o = parser.parse_args()

    if not (o.location or (o.lon and o.lat)):
        parser.error(
            'please provide a location, for instance `--location Welkenraedt`, or use custom lon and lat, e.g. `--lon 5.94 --lat 50.67`'
        )

    elif o.location:
        loc = {loc['name']: loc for loc in locations}[o.location]
        o.lon, o.lat = loc['lon'], loc['lat']
        if 'area' in loc and not o.area:
            o.area = loc['area']

    if not o.area:
        o.area = make_area(o.lon, o.lat, o.width_km)

    print('lon', o.lon)
    print('lat', o.lat)

    if not o.asset and not o.indicators:
        parser.error(
            'please provide indicators, for example: `--indicators 2m_temperature` or asset, e.g. `--asset energy`'
        )

    # assets only contain indicators
    if o.asset:
        for vname in assets[o.asset]:
            if vname not in [v['name'] for v in variables_def]:
                parser.error(
                    f'unknown indicator in assets.yml: {vname}. See indicators.yml for indicator definition'
                )
            o.indicators.append(vname)

    # folder structure for CSV results
    loc_folder = o.location.lower() if o.location else f'{o.lat}N-{o.lon}E'
    asset_folder = o.asset if o.asset else 'all'

    if o.model is None:
        if o.ensemble:
            o.model = get_all_models()
        else:
            o.model = 'mpi_esm1_2_lr'

    # loop over indicators
    vdef_by_name = {v['name']: v for v in variables_def}
    for name in o.indicators:

        variables = []  # each variable for the simulation set

        vdef = vdef_by_name[name]
        indicator_def = dict(name=name,
                             units=vdef.get('units'),
                             description=vdef.get('description'),
                             scale=vdef.get('scale', 1),
                             offset=vdef.get('offset', 0))

        vdef2 = vdef.get('era5', {})
        era5_kwargs = dict(area=o.area, year=o.year)
        era5 = parse_indicator(ERA5,
                               defs=vdef2,
                               cls_kwargs=era5_kwargs,
                               **indicator_def)

        era5.simulation_set = 'ERA5'
        era5.set_folder = 'era5'
        era5.alias = name

        if not o.dataset or o.dataset == 'era5' or o.bias_correction:
            variables.append(era5)

        vdef2 = vdef.get('cmip6', {})
        transform = Transform(vdef2.get('scale', 1), vdef2.get('offset', 0))

        if not o.dataset or o.dataset == 'cmip6':
            for model in o.model:
                labels = {
                    x: "{}-{}.{}".format(*x.split("_"))
                    for x in cmip6_yml["experiments"]
                }
                # if o.historical:
                #     historical_kwargs = dict(model=model, experiment='historical')
                #     historical = parse_indicator(CMIP6, defs=vdef2, cls_kwargs=historical_kwargs, **indicator_def)
                # else:
                #     historical = None
                for experiment in o.experiment:
                    cmip6_kwargs = dict(model=model,
                                        experiment=experiment,
                                        historical=o.historical,
                                        area=o.area)
                    cmip6 = parse_indicator(CMIP6,
                                            defs=vdef2,
                                            cls_kwargs=cmip6_kwargs,
                                            **indicator_def)
                    cmip6.reference = era5
                    cmip6.simulation_set = f'CMIP6 - {labels.get(experiment, experiment)} - {model}'
                    cmip6.set_folder = f'cmip6-{model}-{experiment}'
                    cmip6.alias = name
                    # print("indicator variable", experiment, [d.name for d in cmip6.datasets])
                    variables.append(cmip6)

        if not variables:
            logging.warning(f'no variable for {name}')
            continue

        if o.max_workers < 2:
            variables2 = download_all_variables_serial(variables)
        else:
            variables2 = download_all_variables(variables)

        # Diagnose which variables have been excluded
        names = list(set([v.name for v in variables]))
        names2 = list(set([v.name for v in variables2]))

        models = list(
            set([
                v.datasets[0].model for v in variables
                if isinstance(v.datasets[0], CMIP6)
            ]))
        models2 = list(
            set([
                v.datasets[0].model for v in variables2
                if isinstance(v.datasets[0], CMIP6)
            ]))

        print(f"Downloaded {len(variables2)} out of {len(variables)}")
        print(f"... {len(names2)} out of {len(names)} variable types")
        print(f"... {len(models2)} out of {len(models)} models")
        print("CMIP6 models excluded:",
              " ".join([m for m in models if m not in models2]))
        print("CMIP6 models included:", " ".join(models2))

        variables = variables2

        # download and convert to csv
        for v in variables:
            folder = os.path.join(o.output, loc_folder, asset_folder,
                                  v.set_folder)
            v.csv_file = os.path.join(folder, (v.alias or v.variable) + '.csv')

            if os.path.exists(v.csv_file):
                print("Already exitst:", v.csv_file)
                continue

            series = v.load_timeseries(o.lon, o.lat, overwrite=o.overwrite)

            bias_correction_method = vdef.get('bias-correction')

            if o.bias_correction and isinstance(
                    v.datasets[0],
                    CMIP6) and bias_correction_method is not None:
                era5 = v.reference.load_timeseries(o.lon, o.lat)
                #v.set_folder += '-unbiased'
                if o.yearly_bias:
                    series = correct_yearly_bias(series, era5,
                                                 o.reference_period,
                                                 bias_correction_method)
                else:
                    series = correct_monthly_bias(series, era5,
                                                  o.reference_period,
                                                  bias_correction_method)

            os.makedirs(folder, exist_ok=True)
            print("Save to", v.csv_file)
            save_csv(series, v.csv_file)

        if o.ensemble:
            ensemble_files = {}
            import cftime, datetime
            for experiment in o.experiment:
                ensemble_variables = [
                    v for v in variables if isinstance(v.datasets[0], CMIP6)
                    and v.datasets[0].experiment == experiment
                ]
                dates = np.array([
                    cftime.DatetimeGregorian(y, m, 15)
                    for y in range(1979, 2100 + 1) for m in range(1, 12 + 1)
                ])
                index = pd.Index(cftime.date2num(dates, time_units),
                                 name=time_units)

                df = {}
                for v in ensemble_variables:
                    series = load_csv(v.csv_file)
                    series.index = index[:len(series)]
                    df[v.datasets[0].model] = series
                df = pd.DataFrame(df)
                median = df.median(axis=1)
                lower = df.quantile(.05, axis=1)
                upper = df.quantile(.95, axis=1)
                df["median"] = median
                df["lower"] = lower
                df["upper"] = upper
                first = ensemble_variables[0]
                folder = os.path.join(
                    o.output, loc_folder, asset_folder,
                    first.set_folder.replace(first.datasets[0].model,
                                             "ensemble"))
                csv_file = os.path.join(folder, first.alias
                                        or first.name) + '.csv'
                ensemble_files[experiment] = csv_file
                os.makedirs(folder, exist_ok=True)
                print("Save to", csv_file)
                save_csv(df, csv_file)

        if o.view_region or o.view_timeseries or o.png_region or o.png_timeseries:
            import matplotlib.pyplot as plt
            cb = None
            try:
                import cartopy
                import cartopy.crs as ccrs
                kwargs = dict(projection=ccrs.PlateCarree())
            except ImportError:
                logging.warning('install cartopy to benefit from coastlines')
                cartopy = None
                kwargs = {}

            if o.view is None:
                o.view = o.area

            def plot_timeseries(v):
                figname = v.csv_file.replace('.csv', '.png')
                if os.path.exists(figname):
                    return

                fig2 = plt.figure(num=2)
                plt.clf()
                ax2 = fig2.add_subplot(1, 1, 1)

                ts = load_csv(v.csv_file)
                # convert units for easier reading of graphs
                ts.index = convert_time_units_series(ts.index, years=True)
                # ts.plot(ax=ax2, label=v.simulation_set)
                l, = ax2.plot(ts.index, ts.values, label=v.simulation_set)
                ax2.legend()
                ax2.set_xlabel(ts.index.name)
                ax2.set_ylabel(v.units)
                ax2.set_title(name)

                # add yearly mean as well
                if o.yearly_mean:
                    yearly_mean = ts.rolling(12).mean()
                    l2, = ax2.plot(ts.index[::12],
                                   yearly_mean[::12],
                                   alpha=1,
                                   linewidth=2,
                                   color=l.get_color())

                if o.png_timeseries:
                    fig2.savefig(figname, dpi=o.dpi)

            def plot_region(v):
                v0 = v.datasets[0]

                figname = v.csv_file.replace('.csv', '-region.png')
                if os.path.exists(figname):
                    return

                fig1 = plt.figure(num=1)
                plt.clf()
                ax1 = fig1.add_subplot(1, 1, 1, **kwargs)

                if isinstance(v.datasets[0], ERA5):
                    y1, y2 = o.reference_period
                    roll = False
                    title = f'ERA5: {y1}-{y2}'
                else:
                    y1, y2 = 2071, 2100
                    roll = True if o.view[1] < 0 else False
                    title = f'{labels.get(v0.experiment, v0.experiment)} ({v0.model}): {y1}-{y2}'

                refslice = slice(str(y1), str(y2))
                map = v.load_cube(time=refslice, area=o.view,
                                  roll=roll).mean(dim='time')

                h = ax1.imshow(map.values[::-1],
                               extent=cube_area(map, extent=True))
                cb = plt.colorbar(h, ax=ax1, label=f'{name} ({v.units})')
                # h = map.plot(ax=ax1, cbar_kwargs={'label':f'{v.units}'}, robust=True)
                ax1.set_title(title)
                ax1.plot(o.lon, o.lat, 'ko')

                if cartopy:
                    ax1.coastlines(resolution='10m')

                if o.png_region:
                    fig1.savefig(figname, dpi=o.dpi)

            for v in variables:

                if o.view_timeseries or o.png_timeseries:
                    plot_timeseries(v)

                if o.view_region or o.png_region:
                    try:
                        plot_region(v)
                    except:
                        logging.warning(f'failed to make map for {v.name}')

            # all simulation sets on one figure
            def plot_all_simulations():
                figname = os.path.join(o.output, loc_folder, asset_folder,
                                       'all_' + name + '.png')
                if os.path.exists(figname):
                    return

                fig3 = plt.figure(num=3)
                plt.clf()
                ax3 = fig3.add_subplot(1, 1, 1)
                for v in variables:
                    ts = load_csv(v.csv_file)
                    ts.index = convert_time_units_series(ts.index, years=True)
                    if isinstance(v.datasets[0], ERA5):
                        color = 'k'
                        zorder = 5
                    else:
                        color = None
                        zorder = None

                    # add yearly mean instead of monthly mean
                    if o.yearly_mean:
                        yearly_mean = ts.rolling(12).mean()
                        x = ts.index[::12]
                        y = yearly_mean[::12]
                    else:
                        x = ts.index
                        y = ts.values

                    l, = ax3.plot(x,
                                  y,
                                  alpha=0.5 if o.ensemble else 1,
                                  label=v.simulation_set,
                                  linewidth=1 if o.ensemble else 2,
                                  color=color,
                                  zorder=zorder)

                # Add ensemble mean
                if o.ensemble:
                    for experiment in ensemble_files:
                        df = load_csv(ensemble_files[experiment])
                        df.index = convert_time_units_series(df.index,
                                                             years=True)

                        if o.yearly_mean:
                            yearly_mean = df.rolling(12).mean()
                            x = df.index[::12]
                            y = yearly_mean.iloc[::12]
                        else:
                            x = df.index
                            y = df

                        l, = ax3.plot(x,
                                      y["median"],
                                      alpha=1,
                                      label=f"{experiment} (median)",
                                      linewidth=2,
                                      zorder=4)
                        ax3.plot(x,
                                 y["lower"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.plot(x,
                                 y["upper"],
                                 linewidth=1,
                                 zorder=4,
                                 linestyle="--",
                                 color=l.get_color())
                        ax3.fill_between(x,
                                         y["lower"],
                                         y["upper"],
                                         alpha=0.2,
                                         zorder=-1,
                                         color=l.get_color())

                ax3.legend(fontsize='xx-small')
                ax3.set_ylabel(v.units)
                ax3.set_xlabel(ts.index.name)
                ax3.set_title(name)
                # ax3.set_xlim(xmin=start_year, xmax=2100)

                mi, ma = ax3.get_xlim()
                if mi < 0:
                    ax3.set_xlim(
                        xmin=0)  # start at start_year (i.e. ERA5 start)

                if o.png_timeseries:
                    fig3.savefig(figname, dpi=max(o.dpi, 300))

            if o.view_timeseries or o.png_timeseries:
                plot_all_simulations()

    if o.view_timeseries or o.view_region:
        plt.show()
def plot_cpugpumem_dist_all_scenarios(sc_list, mif_freq, int_freq, nrows=2):
    cmap = plt.get_cmap('rainbow')
    colsd = [cmap(i) for i in np.linspace(0, 1, 6)]

    colsd = [
        '#08519c',
        '#6baed6',  # blues
        #'#a50f15', '#fb6a4a', # reds
        '#006d2c',
        '#74c476',  # greens
    ]

    nrows = nrows
    fig, axs = plt.subplots(nrows,
                            int(np.ceil(len(sc_list) / float(nrows))),
                            figsize=(12 * 1.2, 5 * 1.2),
                            sharex=True,
                            sharey=True)
    axs = axs.ravel()

    xticklbls = [
        'cpu_util',
        'cpu_cost',
        #'gpu_util', 'gpu_cost',
        'mem_util',
        'mem_cost'
    ]

    for ix, each_scenario in enumerate(sc_list):
        print each_scenario
        DATA_DIR = BASE_DATA_DIR + each_scenario + "/"
        cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
            mif_freq, int_freq)
        mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(
            mif_freq, int_freq)
        (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

        cpu_util = perfdata['cpu_util']
        gpu_util = perfdata['gpu_util']
        mem_util = perfdata['sat_total']

        cpu_cost = perfdata['cpu_cost']
        gpu_cost = perfdata['gpu_cost']
        mem_cost = perfdata['sat_cost']

        y_data = [
            cpu_util,
            cpu_cost,
            #gpu_util, gpu_cost,
            mem_util,
            mem_cost
        ]

        pos = np.arange(1, len(y_data) + 1)

        bp = axs[ix].boxplot(y_data,
                             positions=pos,
                             patch_artist=True,
                             widths=0.7)
        # change col of boxes
        for box, c in zip(bp['boxes'], colsd):
            box.set(facecolor=c)  # change fill color

            # change outline color
            box.set(color='#000000', linewidth=1)
            # change fill color
            box.set(facecolor=c)
            i += 1

        ## change color and linewidth of the whiskers
        for whisker in bp['whiskers']:
            whisker.set(color='#000000', linewidth=1, linestyle='-')

        ## change color and linewidth of the caps
        for cap in bp['caps']:
            cap.set(color='#000000', linewidth=1)

        ## change color and linewidth of the medians
        for median in bp['medians']:
            median.set(color='#000000', linewidth=1)

        ## change the style of fliers and their fill
        for flier, c in zip(bp['fliers'], colsd):
            flier.set(marker='x', color=c)

        axs[ix].set_xticks(pos)
        ymax = 100.0 if np.max([np.max(m)
                                for m in y_data]) < 100.0 else np.max(
                                    [np.max(m) for m in y_data])

        axs[ix].set_ylim([-0.5, ymax])
        axs[ix].set_title(each_scenario, fontsize=14)
        axs[ix].tick_params(axis='y', labelsize=14)

        axs[ix].xaxis.grid(False)
        axs[ix].yaxis.grid(True)

    for ix in np.arange(1, int(len(sc_list) / float(nrows)) + 1):
        #axs[-1*ix].set_xticklabels(xticklbls, rotation=35, fontsize=12)
        axs[-1 * ix].set_xticklabels([])

    #axs[8].set_ylim([0,150])

    # legend

    rect_lbl_list = xticklbls
    cols = colsd
    rects_list = []
    for ix, each_rect in enumerate(rect_lbl_list):
        rec = patches.Rectangle((0.72, 0.1), 0.2, 0.6, facecolor=cols[ix])
        rects_list.append(rec)

    leg = plt.figlegend(rects_list,
                        rect_lbl_list,
                        loc='upper center',
                        ncol=len(rects_list) / 2,
                        labelspacing=0.,
                        fontsize=14,
                        frameon=False)
    leg.get_frame().set_facecolor('#FFFFFF')
    leg.get_frame().set_linewidth(0.0)
    leg.draggable()

    plt.subplots_adjust(top=0.88,
                        left=0.025,
                        right=0.995,
                        bottom=0.02,
                        wspace=0.25)
#################
#    MAIN code
#################
SCENARIO_ID = "idle1"
DATA_DIR = BASE_DATA_DIR + SCENARIO_ID + "/"
MIF_FREQ = "default"
INT_FREQ = "default"
#MIF_FREQ = 800000
#INT_FREQ = 800000

cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(
    MIF_FREQ, INT_FREQ)
mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(MIF_FREQ, INT_FREQ)

(count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)

# plot_cross_correlation(perfdata['bus_mif_freq'],
#                        perfdata['cpu_util_freq'])

# temp crop data
#perfdata = _crop_pefdata(perfdata, 586, -1)

lbl = "{0}:mif-{1}:int-{2}".format(SCENARIO_ID, MIF_FREQ, INT_FREQ)

#corr_matrix = compute_correlation_matrix(perfdata, reduced_target_metrics_order_nogpu)[1]
#plot_corr_matrix(corr_matrix, reduced_target_metrics_order_nogpu, SCENARIO_ID)

# plot_overlapped(perfdata, ['cpu_freq',
#                            'bus_mif_freq', 'bus_int_freq',
#                            'cpu_util', 'cpu_cost',
Ejemplo n.º 14
0
import matplotlib.pyplot as plt
from common import load_csv
import sys


x = []
y = []
c = ['r', 'g', 'b', 'c', 'm', 'y', 'k']
KLASS = len(c)
data = load_csv(sys.argv[1])
for i in xrange(0,KLASS):
    x = [float(dat[4]) for dat in data if int(dat[0])%KLASS == i]
    y = [float(dat[3]) for dat in data if int(dat[0])%KLASS == i]
    plt.plot(x, y, c[i])

plt.show()
Ejemplo n.º 15
0
def plot_scatter_allsc_allfreq(scenario_list, scatter_plot=True, line_plot=True):
    
    mean_mem_cost = []
    mean_cpu_cost = []
    mean_gpu_cost = []
    
    mean_all_metrics = {}
    
    
    CPU_METRIC = 'cpu_cost'
    MEM_METRIC = 'sat_cost'
    GPU_METRIC = 'gpu_cost'
    
    for each_sc in scenario_list:        
        print each_sc
        possible_freqs = CUSTOM_CROPPING_PARAMS_ALL[each_sc].keys()
        
        tmp_mem = []
        tmp_cpu = []
        tmp_gpu = []
        tmp_fix = []
        
        for fix, each_f in enumerate(all_mifint_freqs_macroworkload):
            
            #print possible_freqs
            
            if each_f in possible_freqs:   
                #print each_sc, each_f         
                
                miffreq, intfreq = mif_int_freqstr_to_tuple(each_f)
                
                
                
                # get sc data
                DATA_DIR = BASE_DATA_DIR + each_sc + "/"
                cpugpu_csv_fname = DATA_DIR + "data_cpugpu-{0}-{1}.csv".format(miffreq, intfreq)
                mem_csv_fname = DATA_DIR + "data_mem-{0}-{1}.csv".format(miffreq, intfreq)
                (count, perfdata) = load_csv(mem_csv_fname, cpugpu_csv_fname)
                
                m = markers_and_cols_per_scenario[each_sc][0]
                c = markers_and_cols_per_scenario[each_sc][1]
                
                mean_cpu_cost.append([fix, np.mean(perfdata[CPU_METRIC]), m, c])
                mean_gpu_cost.append([fix, np.mean(perfdata[GPU_METRIC]), m, c])
                mean_mem_cost.append([fix, np.mean(perfdata[MEM_METRIC]), m, c])
                
                tmp_cpu.append(np.mean(perfdata[CPU_METRIC]))
                tmp_gpu.append(np.mean(perfdata[GPU_METRIC]))                
                tmp_mem.append(np.mean(perfdata[MEM_METRIC]))
                tmp_fix.append(fix)
                
            else:
                pass
            
            
        # populate dicts
        mean_all_metrics[each_sc] = {
                                       'cpu': tmp_cpu,
                                       'gpu': tmp_gpu,
                                       'mem': tmp_mem,
                                       'fix' : tmp_fix,
                                       'marker' : markers_and_cols_per_scenario[each_sc][0],
                                       'col' : markers_and_cols_per_scenario[each_sc][1]
                                       }
            
    
    ##### only scatters ####
    if scatter_plot == True:
        fig, axs = plt.subplots(1,3)
        fig.canvas.set_window_title("plot_scatter_allsc_allfreq")
        axs = axs.ravel()
        
        axi = 0    
        titles = [MEM_METRIC.replace('sat','mem'), CPU_METRIC, GPU_METRIC]
        for each_metric_data in [
                                 mean_mem_cost, mean_cpu_cost,mean_gpu_cost
                                 ]: 
            
            
            x,y, m, c = zip(*each_metric_data)        
            for i, each_x in enumerate(x):            
                axs[axi].scatter([x[i]],[y[i]],marker=m[i], color=c[i], s=90, linewidth=3.5)
                axs[axi].hold(True)
                
            axs[axi].set_title(titles[axi])
            axs[axi].xaxis.grid(False)
            axs[axi].yaxis.grid(False)
            
            axi+=1
            
        # legend
        rect_lbl_list = scenario_list
        cols = [markers_and_cols_per_scenario[s][1] for s in scenario_list]
        markers = [markers_and_cols_per_scenario[s][0] for s in scenario_list]
        artist_list = []
        for ix, each_rect in enumerate(rect_lbl_list):        
            a = plt.Line2D((0,1),(0,0), color=cols[ix], marker=markers[ix], linestyle='', mew=1.5, ms=13, mec=cols[ix])        
            artist_list.append(a)
        
        leg = plt.figlegend( artist_list, rect_lbl_list, loc = 'upper center', 
                             ncol=len(artist_list)/4, labelspacing=0. , fontsize=13,
                             frameon=False, numpoints=1)
        leg.get_frame().set_facecolor('#FFFFFF')
        leg.get_frame().set_linewidth(0.0)
        leg.draggable()
    
    
    ##### line and scatters ####
    if line_plot == True:
        fsize=14
        titles = [CPU_METRIC, GPU_METRIC]
        metrics = ['cpu', 'gpu']
        
        fig, axs = plt.subplots(1,len(metrics), figsize=(12.1, 5.5))
        fig.canvas.set_window_title("plot_line_allsc_allfreq")
        axs = axs.ravel()
        
           
        #titles = [MEM_METRIC.replace('sat','mem'), CPU_METRIC, GPU_METRIC]
        #metrics = ['mem', 'cpu', 'gpu']
        
        for axi, each_met in enumerate(metrics):
            ax_ymax = 0
            for each_sc_k, each_sc_data in mean_all_metrics.iteritems():
                
                m = each_sc_data['marker']
                c = each_sc_data['col']
                x = each_sc_data['fix']
                y = each_sc_data[each_met]
                
                if np.max(y) > ax_ymax: ax_ymax=np.max(y)
                
                print x
                print y
                print "---"
                
                axs[axi].plot(x,y,marker=m, color=c, linewidth=2, linestyle='', mew=2, ms=10, markeredgecolor=c)
                axs[axi].hold(True)
                axs[axi].plot(x,y, color=c, linewidth=1, linestyle='-', alpha=0.2)
                axs[axi].hold(True)
            
            
            axs[axi].set_xlim([-1, len(all_mifint_freqs_macroworkload)])
            axs[axi].set_ylim([-2, ax_ymax*1.05])      
            axs[axi].set_title(titles[axi], fontsize=fsize)
            axs[axi].xaxis.grid(False)
            axs[axi].yaxis.grid(False)
            axs[axi].set_xticks(np.arange(0,len(all_mifint_freqs_macroworkload)))
            axs[axi].set_xticklabels([f.replace('000','').replace('default-','') for f in all_mifint_freqs_macroworkload], rotation=45, fontsize=fsize)
            axs[axi].tick_params(axis='both', labelsize=fsize)
            
        # legend
        rect_lbl_list = scenario_list
        cols = [markers_and_cols_per_scenario[s][1] for s in scenario_list]
        markers = [markers_and_cols_per_scenario[s][0] for s in scenario_list]
        artist_list = []
        for ix, each_rect in enumerate(rect_lbl_list):        
            a = plt.Line2D((0,1),(0,0), color=cols[ix], marker=markers[ix], linestyle='', mew=1.5, ms=13, mec=cols[ix])        
            artist_list.append(a)
        
        leg = plt.figlegend( artist_list, rect_lbl_list, loc = 'upper center', 
                             ncol=len(artist_list)/4, labelspacing=0. , fontsize=fsize,
                             frameon=False, numpoints=1, columnspacing=1.0)
        leg.get_frame().set_facecolor('#FFFFFF')
        leg.get_frame().set_linewidth(0.0)
        leg.draggable()
        
        plt.subplots_adjust(top=0.83, left=0.03, right=0.99, bottom=0.15, wspace=0.14, hspace=0.18)