Ejemplo n.º 1
0
def clf_yx():
    'Load Lasso Results'
    ts_las, ts_nc = pkload('Ets_final.pk')
    print('--Ets--')
    print_dict(ts_nc)
    tsra_las, tsra_nc = pkload('Etsra_final.pk')
    print('--Etsra--')
    print_dict(tsra_nc)
Ejemplo n.º 2
0
def cmp_etype():
    feas, Ets_best, means, stds = pkload('Best_las_Ets.pk') 
    feas, Etsra_best, means, stds = pkload('Best_las_Etsra.pk') 

    def outs(bests):
        for name, coef in bests.items():
            print('{:<20} -> {:<10}'.format(name, round(coef, 4)))

    print('Ets')
    outs(Ets_best)
    print('Etsra')
    outs(Etsra_best)
Ejemplo n.º 3
0
def plt_hist(pkfile):
    ''
    params, nc = pkload(pkfile)  # nc - name_coef

    'Hist'
    coef_max, coef_min = max(nc.values()), min(nc.values())
    up = math.ceil(coef_max * 100) / 100.0
    if coef_min <= 0:
        down = math.ceil(coef_min * -100) / -100.0
    else:
        down = math.ceil(coef_min * -100) / -100.0

    print('Preparing Hist...')
    fig, ax = plt.subplots()
    plt.title('Coefficient Count ' + str(len(nc.keys())) + ' in total')
    plt.xlabel('Coefficient')
    plt.ylabel('Number')

    ''
    ax.hist(nc.values(), bins=np.arange(down, up + 0.1 * up, (up - down) / 5))
    counts, edges = np.histogram(list(nc.values()), bins=5)
    ax.set_xticks(np.arange(down, up + 0.1 * up, (up - down) / 5))
    ax.set_xlim(down, up)
    ax.set_yticks(np.arange(0, max(counts) + 2, 2))

    print('Save fig...')
    pltsave(pkfile.split('.')[0] + '.png')

    print('Success...')
Ejemplo n.º 4
0
def FeaSelection(pkfile):
    'Pre Data'
    print('Preparing Data...')
    params, feas = pkload(pkfile)
    DS = GetDS('Ets', feas.keys())
    y = DS['target']
    X = DS['features']

    'Feature Selection'
    print('Selecting Features...')
    mean_MSEs = []
    std_MSEs = []
    feas_number = len(X[0])
    for i in range(feas_number):
        X_slice = []
        for j in range(i + 1):
            X_slice.append(X.T[j])
        X_slice = np.array(X_slice).T
        mean_MSE, std_MSE = reg_assemble(y, X_slice)
        mean_MSEs.append(mean_MSE)
        std_MSEs.append(std_MSE)

    'Get Feas Name'
    names = list(feas.keys())
    best_index = mean_MSEs.index(min(mean_MSEs))
    best_feas = {}
    for i in range(best_index + 1):
        best_feas[names[i]] = feas[names[i]]

    'Dump Pickle'
    print('Saving pk...')
    bestreg_pk = (feas, best_feas, mean_MSEs, std_MSEs)
    pkdump('Best' + pkfile.strip('PosCoef'), bestreg_pk)
    print('Success...')
Ejemplo n.º 5
0
def plt_bar():
    ''
    feas, best_feas, best_coefs, meanMSEs, stdMSEs = pkload('BestReg-test.pk')

    'Plot the best feas'
    x = range(len(best_feas))
    fig, ax = plt.subplots()
    plt.bar(x, best_coefs)
    plt.xticks(x, best_feas)
    plt.show()
Ejemplo n.º 6
0
def plt_bep():
    print('Plotting...')
    fig, ((ax1, ax2, ax5), (ax3, ax4, ax6)) = plt.subplots(nrows=2,
                                                           ncols=3,
                                                           figsize=(12, 8))
    plt.suptitle('BEP for Methane Activation')
    plt.tight_layout(pad=2.0, w_pad=2.0, h_pad=2.0)
    plt.subplots_adjust(left=None, bottom=None, right=None, top=0.9, \
            wspace=None, hspace=0.2)

    y, y_p, r2, mse = pkload('ts_Hab2_CH3ab.pk')
    ax1.set_title('(a) $E_{H^{{sp}^2}} + E_{{{CH}_3}^v}$')
    ax1.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax1.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    y, y_p, r2, mse = pkload('ts_Hab2_CH3ab2.pk')
    ax2.set_title('(b) $E_{H^{{sp}^2}} + E_{{{CH}_3}^p}$')
    ax2.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax2.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax2.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    y, y_p, r2, mse = pkload('ts_Hab3_CH3ab.pk')
    ax3.set_title('(c) $E_{H^{{sp}^3}} + E_{{{CH}_3}^v}$')
    ax3.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax3.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax3.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    y, y_p, r2, mse = pkload('ts_Hab3_CH3ab2.pk')
    ax4.set_title('(d) $E_{H^{{sp}^3}} + E_{{{CH}_3}^p}$')
    ax4.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax4.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax4.text(-0.5, 1, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    y, y_p, r2, mse = pkload('tsra_Hab2.pk')
    ax5.set_title('(e) $E_{H^{{sp}^2}}$')
    ax5.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax5.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax5.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    y, y_p, r2, mse = pkload('tsra_Hab3.pk')
    ax6.set_title('(f) $E_{H^{{sp}^3}}$')
    ax6.scatter(y, y_p, edgecolors=(0, 0, 0))
    ax6.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
    ax6.text(0.4, 1.4, '${R^2}$=%0.2f, MSE=%0.2f' % (r2, mse))

    #plt.show()
    pltsave('bep.png')

    print('Success...')
Ejemplo n.º 7
0
def FeaSelection():
    'Pre Data'
    print('Preparing Data...')
    las, nc = pkload('Ets_final.pk')
    DS = GetDS('Ets', nc.keys())
    y = DS['target']; X = DS['features']

    names = list(nc.keys())

    for i in range(len(names)):
        t = names[i].split('_')[0]
        if t == 'h':
            X.T[i] = np.sin((X.T[i]/2)**2)
        elif t == 'a':
            X.T[i] = X.T[i]**2

    'Feature Selection'
    print('Selecting Features...')
    mean_MSEs = []; std_MSEs = []; r2s = []
    feas_number = len(X[0])
    for i in range(feas_number):
        X_slice = []
        for j in range(i+1):
            X_slice.append(X.T[j])
        X_slice = np.array(X_slice).T
        mean_MSE, std_MSE = reg_assemble(y, X_slice)
        mean_MSEs.append(mean_MSE)
        std_MSEs.append(std_MSE)

    'Get Feas Name'
    best_index = mean_MSEs.index(min(mean_MSEs))
    best_feas = {}
    for i in range(best_index+1):
        best_feas[names[i]] = nc[names[i]]

    'Dump Pickle'
    print('Saving pk...')
    bestreg_pk = (nc, best_feas, mean_MSEs, std_MSEs)
    pkdump('Best_Ets.pk', bestreg_pk) 
    print('Success...')
Ejemplo n.º 8
0
def PreSelection(pkfile):
    print('-' * 20)
    'Load DataSet'
    print('Loading DataSet...')
    DS = GetDS(pkfile.split('.')[0].split('_')[1])

    'Load Pickle'
    print('Loading ' + pkfile + '...')
    gs = pkload(pkfile)
    if pkfile == 'lsr.pk':
        best_gs = gs
    else:
        best_gs = gs.best_estimator_

    'Get Positive Coef'
    print('Collecting Coefficients...')
    # get feature name and coef
    feanames = DS['fea_names']
    coefs = best_gs.coef_
    #
    name_coef = {}
    for name, coef in zip(feanames, coefs):
        if abs(coef) > 0:
            name_coef[name] = coef
    # sort fea by abs value
    nc_sorted = sorted(name_coef.items(),
                       key=lambda d: abs(d[1]),
                       reverse=True)
    nc_dict = {}
    for t in nc_sorted:
        nc_dict[t[0]] = t[1]

    'Dump Pickle'
    print('Dumping Pickle...')
    laspk = nc_dict
    pkdump('PosCoef_' + pkfile, laspk)

    print('Success...')
    print('-' * 20)
Ejemplo n.º 9
0
def plt_curve(pkfile):
    'Load pk'
    feas, best_feas, mean_MSEs, std_MSEs = pkload(pkfile)
    print(best_feas)

    'Plt Learning Curve'
    print('Plotting Learning Curve...')
    plt.title('Feature Selection from LASSO '+\
            pkfile.split('.')[0].split('_')[2],fontsize=16)
    plt.xlabel('Number of Feature Used')
    plt.ylabel('Mean MSE')

    ''
    ax = plt.gca()
    x = range(1, len(feas.keys()) + 1)
    ax.set_xlim(0, len(x) + 1)
    ax.set_xticks(np.arange(0, len(x) + 1, 2))
    ax.set_ylim(0, max(mean_MSEs) * 1.1)
    ax.set_yticks(np.arange(0, max(mean_MSEs) * 1.1, 0.5))
    ax.plot(x, mean_MSEs)

    'Plot best_index'
    best_score = min(mean_MSEs)
    best_index = mean_MSEs.index(best_score)

    # Plot a dotted vertical line at the best score for that scorer marked by x
    ax.plot([x[best_index], ] * 2, [0, best_score], \
            linestyle='-.', color='b', marker='x', markeredgewidth=3, ms=8)

    # Annotate the best score for that scorer
    ax.annotate("%0.2f" % best_score, (x[best_index], best_score + 0.005))

    print('Saving fig...')
    pltsave(pkfile.split('.')[0] + '.png')

    print('Success...')
Ejemplo n.º 10
0
def plt_cv():
    'Load Pickle'
    print('Loading Pickle...')
    gslas = pkload('las_Ea.pk')

    'Get Results'
    results = gslas.cv_results_
    scoring = gslas.scorer_

    #def additional():
    'Plt Learning Curve'
    print('Plotting Learning Curve...')
    plt.figure(figsize=(12, 8))  # figsize in inch, 1inch=2.54cm
    plt.title("GridSearchCV for LASSO", fontsize=16)
    plt.xlabel("Alpha")

    # Get the regular numpy array from the MaskedArray
    X_axis = np.array(results['param_alpha'].data, dtype=float)
    ax = plt.gca()

    'R2'
    scorer = 'R2'
    color = 'g'
    ax.set_xlim(0, 1)
    ax.set_ylim(0, 1)
    ax.set_ylabel('R2')
    for sample, style in (('train', '--'), ('test', '-')):  # sample
        sample_score_mean = results['mean_%s_%s' %
                                    (sample, scorer)]  # score mean
        sample_score_std = results['std_%s_%s' % (sample, scorer)]  # score std

        ax.fill_between(X_axis, sample_score_mean - sample_score_std, \
                sample_score_mean + sample_score_std, \
                alpha=0.1 if sample == 'test' else 0, color=color)
        ax.plot(X_axis, sample_score_mean, style, color=color, \
                alpha=1 if sample == 'test' else 0.7, \
                label="%s (%s)" % (scorer, sample))

    best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]
    best_score = results['mean_test_%s' % scorer][best_index]

    # Plot a dotted vertical line at the best score for that scorer marked by x
    ax.plot([
        X_axis[best_index],
    ] * 2, [0, best_score],
            linestyle='-.',
            color=color,
            marker='x',
            markeredgewidth=3,
            ms=8)

    # Annotate the best score for that scorer
    ax.annotate("%0.2f" % best_score, \
            (X_axis[best_index], best_score + 0.005))

    ax.legend(loc=2)

    'MSE'
    scorer = 'MSE'
    color = 'k'
    ax2 = ax.twinx()
    ax2.set_xlim(0, 1)
    ax2.set_ylim(-1, 2)
    ax2.set_ylabel('MSE')
    for sample, style in (('train', '--'), ('test', '-')):  # sample
        sample_score_mean = results['mean_%s_%s' %
                                    (sample, scorer)]  # score mean
        sample_score_std = results['std_%s_%s' % (sample, scorer)]  # score std

        ax2.fill_between(X_axis, sample_score_mean - sample_score_std, \
                sample_score_mean + sample_score_std, \
                alpha=0.1 if sample == 'test' else 0, color=color)
        ax2.plot(X_axis, sample_score_mean, style, color=color, \
                alpha=1 if sample == 'test' else 0.7, \
                label="%s (%s)" % (scorer, sample))

    best_index = np.nonzero(results['rank_test_%s' % scorer] == 1)[0][0]
    best_score = results['mean_test_%s' % scorer][best_index]

    # Plot a dotted vertical line at the best score for that scorer marked by x
    ax2.plot([
        X_axis[best_index],
    ] * 2, [0, best_score],
             linestyle='-.',
             color=color,
             marker='x',
             markeredgewidth=3,
             ms=8)

    # Annotate the best score for that scorer
    ax2.annotate("%0.2f" % best_score, \
            (X_axis[best_index], best_score + 0.005))

    ax2.legend(loc=1)

    ''
    plt.grid("off")
    pltsave('LearnCurve.png')