コード例 #1
0
def modeled_indices(X, y, df, model_type= None):
    '''
    We get the largest negatives residuals here
    INPUT  Feature matrix, Labels ie price_filtering, Specify which model
    OUTPUT Sorted residuals from the specified modeled (largest negatives first)
    ''' 
    #Manual override option
    if model_type:

        model = {'L1': Lasso(alpha= 1, tol= .01, warm_start= False, positive= False),
                 'SVR_lin': SVR('linear', C= 6.3095734448019298, gamma= 0.1, degree= 1),
                 'RF': RandomForestRegressor(min_samples_split= 2, n_estimators= 10)}
        y_hat = model[model_type].fit(X, y).predict(X)
        
    else:
        model = search_best_params(X, y, df)
        y_hat = model.fit(X, y).predict(X)
        
    #Used for later for identifying 2stds
    df['predicted_price'] = y_hat
    df['residual'] =  df['px'] - df['predicted_price']
    
    #Residual as a percentage
    # df['price_distance_craig'] =  1 - df['px'] / df['predicted_price']
    #change specifically so we could show in terms of stand deviation vs percentage
    df['price_distance_craig'] = df['residual'] / df['residual'].std()


    return df, find_indices(y_hat, y), model
コード例 #2
0
def routine(X, y, model, df, pxs):
    '''
    INPUT  Feature Matrix, labels, Original DataFrame, eBay prices
    OUTPUT Recommendation Table (Ranked respectively)
    '''
    # this should be in some proportion to the sample size
    top_n_recs = int(len(df) * .15)
    if top_n_recs < 10: raise Exception("too few observations")
    top_indices = find_indices(model.fit(X, y).predict(X), y)
    df = df.iloc[top_indices][['heading', 'year', 'px']][:top_n_recs]
    max_price = make_pricing_panel(df, pxs)['spread'].max()
    average_spread = make_pricing_panel(df, pxs)['spread'].mean()

    return max_price, average_spread
コード例 #3
0
def routine(X, y, model, df, pxs):
    '''
    INPUT  Feature Matrix, labels, Original DataFrame, eBay prices
    OUTPUT Recommendation Table (Ranked respectively)
    '''
    # this should be in some proportion to the sample size
    top_n_recs = int(len(df) * .15)
    if top_n_recs < 10: raise Exception("too few observations")
    top_indices = find_indices(model.fit(X, y).predict(X), y)
    df  = df.iloc[top_indices][['heading', 'year', 'px']][:top_n_recs]
    max_price = make_pricing_panel(df, pxs)['spread'].max()
    average_spread = make_pricing_panel(df, pxs)['spread'].mean()

    return max_price, average_spread