Ejemplo n.º 1
0
def get_classifier(classifier):
  if classifier["name"] == 'linear-ridge':
    c = RidgeClassifier()
  elif classifier["name"] == 'SVC':
    c = SVC()
  elif classifier["name"] == "l2-SVC":
    c = L2KernelClassifier()
  elif classifier["name"] == "fredholm":
    c = L2FredholmClassifier()
  elif classifier["name"] == "TSVM":
    c = SVMLight()
  elif classifier["name"] == "Lap-RLSC":
    c = LapRLSC()
  elif classifier["name"] == "fred_kernel_appr":
    c = FredholmKernelApprClassifier()
  else:
    raise NameError('Not existing classifier: ' + classifier["name"] + '.')
  c.set_params(**classifier["params"])
  return c
Ejemplo n.º 2
0
def get_optimal_blend_weigth(exp_, best_param_,
                             folder, fname, model_fname):
    clf = RidgeClassifier()
    X_test, y_test = exp_.get_test_data()
    clf.set_params(**best_param_)
    clf.fit(X_test, y_test)

    # dump2csv optimal linear weight
    names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
    coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
    optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
    optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
                                              folder,
                                              fname), index=False)

    # dump2cpkle for ridge model
    model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
    with gzip.open(model_fname, 'wb') as gf:
        cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
    
    return True
def get_optimal_blend_weigth(exp_, best_param_,
                             folder, fname, model_fname):
    clf = RidgeClassifier()
    X_test, y_test = exp_.get_test_data()
    clf.set_params(**best_param_)
    clf.fit(X_test, y_test)

    # dump2csv optimal linear weight
    names = np.append(np.array(['intercept'], dtype='S100'), X_test.columns.values)
    coefs = np.append(clf.intercept_, clf.coef_).astype(np.float64)
    optimal_linear_weight = pd.DataFrame(coefs.reshape(1,len(coefs)), columns=names)
    optimal_linear_weight.to_csv(os.path.join(Config.get_string('data.path'),
                                              folder,
                                              fname), index=False)

    # dump2cpkle for ridge model
    model_fname = os.path.join(Config.get_string('data.path'), folder, model_fname)
    with gzip.open(model_fname, 'wb') as gf:
        cPickle.dump(clf, gf, cPickle.HIGHEST_PROTOCOL)
    
    return True
Ejemplo n.º 4
0
def get_ridge_plot(best_param_, experiment_, 
                   param_keys_, param_vals_,
                   png_folder,
                   png_fname,
                   score_threshold=0.8):

    parameters = dict(zip(param_keys_, param_vals_))
    del parameters['model_type']

    clf = RidgeClassifier()
    X_train, y_train = experiment_.get_train_data()
    clf.set_params(**best_param_)
    clf.fit(X_train, y_train)    
    best_alpha = best_param_['alpha']
    result = {'alphas':[],
              'coefs':np.zeros( (len(parameters['alpha']), len(X_train.columns.values) + 1) ),
              'scores':[],
              'score':None}


    for i, alpha in enumerate(parameters.get('alpha',None)):
        result['alphas'].append(alpha)
        del best_param_['alpha']
        best_param_['alpha'] = alpha
        clf.set_params(**best_param_)
        clf.fit(X_train, y_train)

        # regularization path
        tmp = np.array([0 for j in xrange(len(X_train.columns.values) + 1)], dtype=np.float32)
        if best_param_['fit_intercept']:
            tmp = np.append(clf.intercept_, clf.coef_)
        else:
            tmp[1:] = clf.intercept_
        result['coefs'][i,:] = tmp
        result['scores'].append(experiment_.get_proba(clf, X_train))
    del X_train, y_train

    # 2. 
    tmp_len = len(experiment_.get_data_col_name())
    index2feature = dict(zip(np.arange(1, tmp_len + 1), 
                             experiment_.get_data_col_name()))
    if best_param_['fit_intercept']:
        index2feature[0] = 'intercept'

    # 3. plot
    gs = GridSpec(2,2)
    ax1 = plt.subplot(gs[:,0])
    ax2 = plt.subplot(gs[0,1])
    ax3 = plt.subplot(gs[1,1])


    # 3.1 feature importance
    labels = np.append(np.array(['intercept'], dtype='S100'), experiment_.get_data_col_name())
    nrows, ncols = result['coefs'].shape
    for ncol in xrange(ncols):
        ax1.plot(np.array(result['alphas']), result['coefs'][:,ncol], label = labels[ncol])
    ax1.legend(loc='best')
    ax1.set_xscale('log')
    ax1.set_title("Regularization Path:%1.3e" % (best_alpha))
    ax1.set_xlabel("alpha", fontsize=10)

    # 3.2 PDF
    X_test, y_test = experiment_.get_test_data()
    result['score'] = clf.decision_function(X_test)
    sns.distplot(result['score'], kde=False, rug=False, ax=ax2)
    ax2.set_title("PDF : Decision_Function")


    # 3.3 CDF
    num_bins = 100
    try:
        counts, bin_edges = np.histogram(result['score'], bins=num_bins, normed=True)
    except:
        counts, bin_edges = np.histogram(result['score'], normed=True)

    cdf = np.cumsum(counts)
    ax3.plot(bin_edges[1:], cdf / cdf.max())
    ax3.set_title("CDF")
    ax3.set_xlabel("Decision_Function:Confidence_Score", fontsize=10)


    png_fname = os.path.join(Config.get_string('data.path'), png_folder, png_fname)
    plt.tight_layout()
    plt.savefig(png_fname)
    plt.close()

    return True
def get_ridge_plot(best_param_, experiment_, 
                   param_keys_, param_vals_,
                   png_folder,
                   png_fname,
                   score_threshold=0.8):

    parameters = dict(zip(param_keys_, param_vals_))
    del parameters['model_type']

    clf = RidgeClassifier()
    X_train, y_train = experiment_.get_train_data()
    clf.set_params(**best_param_)
    clf.fit(X_train, y_train)    
    best_alpha = best_param_['alpha']
    result = {'alphas':[],
              'coefs':np.zeros( (len(parameters['alpha']), len(X_train.columns.values) + 1) ),
              'scores':[],
              'score':None}


    for i, alpha in enumerate(parameters.get('alpha',None)):
        result['alphas'].append(alpha)
        del best_param_['alpha']
        best_param_['alpha'] = alpha
        clf.set_params(**best_param_)
        clf.fit(X_train, y_train)

        # regularization path
        tmp = np.array([0 for j in xrange(len(X_train.columns.values) + 1)], dtype=np.float32)
        if best_param_['fit_intercept']:
            tmp = np.append(clf.intercept_, clf.coef_)
        else:
            tmp[1:] = clf.intercept_
        result['coefs'][i,:] = tmp
        result['scores'].append(experiment_.get_proba(clf, X_train))
    del X_train, y_train

    # 2. 
    tmp_len = len(experiment_.get_data_col_name())
    index2feature = dict(zip(np.arange(1, tmp_len + 1), 
                             experiment_.get_data_col_name()))
    if best_param_['fit_intercept']:
        index2feature[0] = 'intercept'

    # 3. plot
    gs = GridSpec(2,2)
    ax1 = plt.subplot(gs[:,0])
    ax2 = plt.subplot(gs[0,1])
    ax3 = plt.subplot(gs[1,1])


    # 3.1 feature importance
    labels = np.append(np.array(['intercept'], dtype='S100'), experiment_.get_data_col_name())
    nrows, ncols = result['coefs'].shape
    for ncol in xrange(ncols):
        ax1.plot(np.array(result['alphas']), result['coefs'][:,ncol], label = labels[ncol])
    ax1.legend(loc='best')
    ax1.set_xscale('log')
    ax1.set_title("Regularization Path:%1.3e" % (best_alpha))
    ax1.set_xlabel("alpha", fontsize=10)

    # 3.2 PDF
    X_test, y_test = experiment_.get_test_data()
    result['score'] = clf.decision_function(X_test)
    sns.distplot(result['score'], kde=False, rug=False, ax=ax2)
    ax2.set_title("PDF : Decision_Function")


    # 3.3 CDF
    num_bins = 100
    try:
        counts, bin_edges = np.histogram(result['score'], bins=num_bins, normed=True)
    except:
        counts, bin_edges = np.histogram(result['score'], normed=True)

    cdf = np.cumsum(counts)
    ax3.plot(bin_edges[1:], cdf / cdf.max())
    ax3.set_title("CDF")
    ax3.set_xlabel("Decision_Function:Confidence_Score", fontsize=10)


    png_fname = os.path.join(Config.get_string('data.path'), png_folder, png_fname)
    plt.tight_layout()
    plt.savefig(png_fname)
    plt.close()

    return True