def log_stats(filename, msg, dist):
    #from numpy import mean, median, std
    #from scipy.stats import scoreatpercentile as score
    b = sorted(dist)  #.values()
    b = [getfirstorself(item) for item in b]
    with open(filename, 'a') as outf:
        print(msg, file=outf)
        print("min={} q1={} median={} q3={} max={} mean={} stddev={}".format(
            min(b), score(b, 25), median(b), score(b, 75), max(b), mean(b),
            std(b)),
              file=outf)
Exemple #2
0
    def plot_mmm(self,
                 ax,
                 index,
                 xscale=1.0,
                 yscale=1.0,
                 xlabel='',
                 ylabel='',
                 do_rate=False):

        tmid = (self.ts.t[:-1] + self.ts.t[1:]) / 2.0
        d = []
        for k in self.ts.j.hosts.keys():
            v = self.ts.assemble(index, k, 0)
            if do_rate:
                d.append(numpy.divide(numpy.diff(v), numpy.diff(self.ts.t)))
            else:
                d.append((v[:-1] + v[1:]) / 2.0)

        a = numpy.array(d)

        mn = []
        p25 = []
        p50 = []
        p75 = []
        mx = []
        for i in range(len(self.ts.t) - 1):
            mn.append(min(a[:, i]))
            p25.append(score(a[:, i], 25))
            p50.append(score(a[:, i], 50))
            p75.append(score(a[:, i], 75))
            mx.append(max(a[:, i]))

        mn = numpy.array(mn)
        p25 = numpy.array(p25)
        p50 = numpy.array(p50)
        p75 = numpy.array(p75)
        mx = numpy.array(mx)

        ax.hold = True
        ax.plot(tmid / xscale, mn / yscale, '--')
        ax.plot(tmid / xscale, p25 / yscale)
        ax.plot(tmid / xscale, p50 / yscale)
        ax.plot(tmid / xscale, p75 / yscale)
        ax.plot(tmid / xscale, mx / yscale, '--')

        self.setlabels(ax, index, xlabel, ylabel, yscale)
        ax.yaxis.set_major_locator(matplotlib.ticker.MaxNLocator(nbins=4))
        tspl_utils.adjust_yaxis_range(ax, 0.1)
Exemple #3
0
def diffpdfs_param(df, title=None, fig=None, label=None, footer=True):
    """Short summary.

    Parameters
    ----------
    df : type
        Description of parameter `df`.
    title : type
        Description of parameter `title` (the default is None).
    fig : type
        Description of parameter `fig` (the default is None).
    label : type
        Description of parameter `label` (the default is None).
    footer : type
        Description of parameter `footer` (the default is True).

    Returns
    -------
    type
        Description of returned object.

    """
    from scipy.stats import scoreatpercentile as score
    sns.set_style('ticks')

    maxval = score(df.CMAQ.values - df.Obs.values, per=99.9)
    minval = score(df.CMAQ.values - df.Obs.values, per=.1)
    if fig is None:
        plt.figure(figsize=(10, 7))
        if label == 'None':
            label = 'CMAQ - Obs'
        sns.kdeplot(df.CMAQ.values - df.Obs.values,
                    color='darkslategrey',
                    label=label)
        sns.despine()
        plt.xlim([minval, maxval])
        plt.xlabel(df.Species.unique()[0] + ' Difference (' +
                   df.Units.unique()[0] + ')')
        plt.title(title)
        plt.gca().axes.set_ylabel('P( Model - Obs )')
        if footer:
            footer_text(df)
        plt.tight_layout()
    else:
        ax = fig.get_axes()[0]
        sns.kdeplot(df.CMAQ.values - df.Obs.values, ax=ax, label=label)
def get_second_baseline_oneTrace(trace, SD_window, SD_percentile):
    from scipy.stats import scoreatpercentile as score
    """
    params: traces- trace array, after subtraction of first baseline. Shape should be [frames, cells, trial]
            window - width of sliding window in frames corresponding to secPerFrame*window seconds. 
            Fluor_percentile - percentile of fluorescence distribution at which to calculate score. 
            
    returns: second_baseline - single value, most common position in trace where SD == SD_percentile
             idx - 1d array of indeces that correspond to times in trace where SD == SD_percentile
                   Can be plugged into normalized_SD array in Normalization step. 
             rolling_SD
    
    """
    
    
    win = SD_window/2
    
    rolling_SD = np.array([trace[s-win:s+win].std() for s in np.arange(win,(trace.shape[0]-win))])
    #Get SD value at 'percentile_val' percentile
    SD = score(rolling_SD, SD_percentile)
    SD = np.round(SD)
    
    #Find most common position in trace where SD is at the 5th percentile. 
    
    #find times where std is minimal.
    idx = np.argwhere(np.round(rolling_SD) == SD)
    
    # find the most common intensity value  at this index. This is the baseline value of the entire trace. 
    
    #get the median of the largest bin of the histogram of the range of trace[idx] values
    #specify bin size: 
    try:
        bins = np.round((idx[:,0].shape[0])/10.0)
        a,b = np.histogram(np.round(trace[win:-win][idx]), bins = bins)
        #get the range of trace[idx] values that reside within the largest bin
        bin_num = np.argwhere(b==b[a==a.max()][0])[0][0]
        left_edge = b[bin_num]
        right_edge = b[bin_num+1]
        
        #this is the median val...the baseline.
        second_baseline = score(np.unique(trace[idx].clip(left_edge,right_edge)),50)
    except: 
        second_baseline = score(np.unique(trace[idx]),50)
    return second_baseline, np.squeeze(idx), rolling_SD
Exemple #5
0
  def plot_mmm(self,ax,index,xscale=1.0,yscale=1.0,xlabel='',ylabel='',
               do_rate=False):

    tmid=(self.ts.t[:-1]+self.ts.t[1:])/2.0
    d=[]
    for k in self.ts.j.hosts.keys():
      v=self.ts.assemble(index,k,0)
      if do_rate:
        d.append(numpy.divide(numpy.diff(v),numpy.diff(self.ts.t)))
      else:
        d.append((v[:-1]+v[1:])/2.0)

    a=numpy.array(d)

    mn=[]
    p25=[]
    p50=[]
    p75=[]
    mx=[]
    for i in range(len(self.ts.t)-1):
      mn.append(min(a[:,i]))
      p25.append(score(a[:,i],25))
      p50.append(score(a[:,i],50))
      p75.append(score(a[:,i],75))
      mx.append(max(a[:,i]))

    mn=numpy.array(mn)
    p25=numpy.array(p25)
    p50=numpy.array(p50)
    p75=numpy.array(p75)
    mx=numpy.array(mx)

    ax.hold=True
    ax.plot(tmid/xscale,mn/yscale,'--')
    ax.plot(tmid/xscale,p25/yscale)
    ax.plot(tmid/xscale,p50/yscale)
    ax.plot(tmid/xscale,p75/yscale)
    ax.plot(tmid/xscale,mx/yscale,'--')

    self.setlabels(ax,index,xlabel,ylabel,yscale)
    ax.yaxis.set_major_locator( matplotlib.ticker.MaxNLocator(nbins=4))
    tspl_utils.adjust_yaxis_range(ax,0.1)
Exemple #6
0
def sp_scatter_bias(df,
                    col1=None,
                    col2=None,
                    ax=None,
                    outline=False,
                    tight=True,
                    global_map=True,
                    map_kwargs={},
                    cbar_kwargs={},
                    val_max=None,
                    val_min=None,
                    **kwargs):
    from scipy.stats import scoreatpercentile as score
    from numpy import around
    if ax is None:
        ax = draw_map(**map_kwargs)
    try:
        if col1 is None or col2 is None:
            print('User must specify col1 and col2 in the dataframe')
            raise ValueError
        else:
            dfnew = df[['latitude', 'longitude', col1,
                        col2]].dropna().copy(deep=True)
            dfnew['sp_diff'] = (dfnew[col2] - dfnew[col1])
            top = score(dfnew['sp_diff'].abs(), per=95)
            if val_max is not None:
                top = val_max
            x, y = df.longitude.values, df.latitude.values
            dfnew['sp_diff_size'] = dfnew['sp_diff'].abs() / top * 100.
            dfnew.loc[dfnew['sp_diff_size'] > 300, 'sp_diff_size'] = 300.
            dfnew.plot.scatter(x='longitude',
                               y='latitude',
                               c=dfnew['sp_diff'],
                               s=dfnew['sp_diff_size'],
                               vmin=-1 * top,
                               vmax=top,
                               ax=ax,
                               colorbar=True,
                               **kwargs)
            if ~outline:
                ax.outline_patch.set_alpha(0)
            if global_map:
                plt.xlim([-180, 180])
                plt.ylim([-90, 90])
            if tight:
                plt.tight_layout(pad=0)
            return ax
    except ValueError:
        exit
Exemple #7
0
def spatial_bias_scatter(df,
                         m,
                         date,
                         vmin=None,
                         vmax=None,
                         savename='',
                         ncolors=15,
                         fact=1.5,
                         cmap='RdBu_r'):

    from scipy.stats import scoreatpercentile as score
    from numpy import around
    #    plt.figure(figsize=(11, 6), frameon=False)
    f, ax = plt.subplots(figsize=(11, 6), frameon=False)
    ax.set_facecolor('white')
    diff = (df.CMAQ - df.Obs)
    top = around(score(diff.abs(), per=95))
    new = df[df.datetime == date]
    x, y = m(new.longitude.values, new.latitude.values)
    c, cmap = colorbar_index(ncolors,
                             cmap,
                             minval=top * -1,
                             maxval=top,
                             basemap=m)

    c.ax.tick_params(labelsize=13)
    #    cmap = cmap_discretize(cmap, ncolors)
    colors = new.CMAQ - new.Obs
    ss = (new.CMAQ - new.Obs).abs() / top * 100.
    ss[ss > 300] = 300.
    plt.scatter(x,
                y,
                c=colors,
                s=ss,
                vmin=-1. * top,
                vmax=top,
                cmap=cmap,
                edgecolors='k',
                linewidths=.25,
                alpha=.7)

    if savename != '':
        plt.savefig(savename + date + '.jpg', dpi=75.)
        plt.close()
    return f, ax, c
def normalize_oneTrace(trace, first_baseline, second_baseline, rolling_SD, idx, SD_window = 20):
    from scipy.stats import scoreatpercentile as score
    """
    params: trace - 1d array. Output of step 1. After subtraction of first baseline.
            first_baseline - 1d array. output from step 1. 
            second_baseline - single float. Output from step 2. 
            idx - single array of indeces where SD == SD_percentile. 
            rolling_SD:
    returns:         
    """
    win = SD_window/2
    
    normed_trace = (trace - second_baseline)/(first_baseline + second_baseline) #baseline is the output of step 2
    normed_rolling_SD = (rolling_SD)/(first_baseline[win:-win] + second_baseline) #rolling SD obtained from step 2
    
    sd_vals = np.unique(np.round(normed_rolling_SD[idx], 3))
    normed_SD = score(sd_vals, 50)
    
    return normed_trace, normed_SD
def get_first_baseline_oneTrace(trace, window, Fluor_percentile):
    from scipy.stats import scoreatpercentile as score
    """
    params: trace - 1d array of shape [frames]
    window - width of sliding window in frames corresponding to secPerFrame*window seconds.
    Fluor_percentile - percentile of fluorescence distribution at which to calculate score.
    """
    win = window/2

    baseline = np.array([score(trace[s-win:s+win], Fluor_percentile) for s in range(win,(trace.shape[0]-win))])
    #now pad baseline with first and last value of baseline; win samples wide on each end. 
    baselined_trace = trace[win:-win]-baseline
    
    a = np.zeros(win)
    pad = np.hstack((a, baseline, a))
    pad[:win] = pad[win+5]
    pad[-win:] = pad[-win-5]
    baseline = pad
    #now subtract padded baseline from trace. 
    baselined_trace = trace-baseline

    return baseline, baselined_trace
Exemple #10
0
# because some price return scores are None
for row in hqm_dataframe.index:
    for time_period in time_periods:
        change_col = f'{time_period} Price Return'
        percentile_col = f'{time_period} Return Percentile'

        if hqm_dataframe.loc[row, change_col] == None:
            hqm_dataframe.loc[row, change_col] = 0.0

for row in hqm_dataframe.index:
    for time_period in time_periods:
        change_col = f'{time_period} Price Return'
        percentile_col = f'{time_period} Return Percentile'

        hqm_dataframe.loc[row, percentile_col] = score(
            hqm_dataframe[change_col], hqm_dataframe.loc[row,
                                                         change_col]) / 100

for row in hqm_dataframe.index:
    momentum_percentiles = []
    for time_period in time_periods:
        momentum_percentiles.append(
            hqm_dataframe.loc[row, f'{time_period} Return Percentile'])
    hqm_dataframe.loc[row, 'HQM Score'] = mean(momentum_percentiles)

# creating HQM score column
hqm_dataframe.sort_values('HQM Score', ascending=False, inplace=True)
hqm_dataframe = hqm_dataframe[:50]
hqm_dataframe.reset_index(inplace=True, drop=True)

position_size = float(portfolio_size) / len(hqm_dataframe.index)
Exemple #11
0
def scatter_param(df, title=None, fig=None, label=None, footer=True):
    """Short summary.

    Parameters
    ----------
    df : type
        Description of parameter `df`.
    title : type
        Description of parameter `title` (the default is None).
    fig : type
        Description of parameter `fig` (the default is None).
    label : type
        Description of parameter `label` (the default is None).
    footer : type
        Description of parameter `footer` (the default is True).

    Returns
    -------
    type
        Description of returned object.

    """
    from numpy import max, arange, linspace, isnan
    from scipy.stats import scoreatpercentile as score
    from scipy.stats import linregress
    sns.set_style('ticks')

    species, units = df.Species.unique()[0], df.Units.unique()[0]
    mask = ~isnan(df.Obs.values) & ~isnan(df.CMAQ.values)
    maxval1 = score(df.CMAQ.values[mask], per=99.5)
    maxval2 = score(df.Obs.values[mask], per=99.5)
    maxval = max([maxval1, maxval2])
    print maxval
    if fig is None:
        plt.figure(figsize=(10, 7))

        plt.scatter(df.Obs,
                    df.CMAQ,
                    c='cornflowerblue',
                    marker='o',
                    edgecolors='w',
                    alpha=.3,
                    label=label)
        x = arange(0, maxval + 1)
        if maxval <= 10.:
            x = linspace(0, maxval, 25)
        plt.plot(x, x, '--', color='slategrey')
        tt = linregress(df.Obs.values[mask], df.CMAQ.values[mask])
        plt.plot(x, tt[0] * x + tt[1], color='tomato')

        plt.xlim([0, maxval])
        plt.ylim([0, maxval])
        plt.xlabel('Obs ' + species + ' (' + units + ')')
        plt.title(title)
        plt.gca().axes.set_ylabel('Model ' + species + ' (' + units + ')')
        if footer:
            footer_text(df)
        plt.tight_layout()
        plt.grid(alpha=.5)
    else:
        ax = fig.get_axes()[0]
        l, = ax.scatter(df.Obs,
                        df.CMAQ,
                        marker='o',
                        edgecolors='w',
                        alpha=.3,
                        label=label)
        tt = linregress(df.Obs.values, df.CMAQ.values)
        ax.plot(df.Obs.unique(),
                tt[0] * df.Obs.unique() + tt[1],
                color=l.get_color())
        plt.legend(loc='Best')
Exemple #12
0
def kdeplots_param(df,
                   title=None,
                   fig=None,
                   label=None,
                   footer=True,
                   cumulative=False):
    """Short summary.

    Parameters
    ----------
    df : type
        Description of parameter `df`.
    title : type
        Description of parameter `title` (the default is None).
    fig : type
        Description of parameter `fig` (the default is None).
    label : type
        Description of parameter `label` (the default is None).
    footer : type
        Description of parameter `footer` (the default is True).
    cumulative : type
        Description of parameter `cumulative` (the default is False).

    Returns
    -------
    type
        Description of returned object.

    """
    from scipy.stats import scoreatpercentile as score
    sns.set_style('ticks')

    if fig is None:

        if cumulative:
            plt.figure(figsize=(13, 8))
            sns.kdeplot(df.Obs,
                        color='darkslategrey',
                        cumulative=True,
                        label='Obs')
            sns.kdeplot(df.CMAQ,
                        color='dodgerblue',
                        cumulative=True,
                        label=label)
        else:
            maxval1 = score(df.CMAQ.values, per=99.5)
            maxval2 = score(df.Obs.values, per=99.5)
            maxval = max([maxval1, maxval2])
            plt.figure(figsize=(13, 8))
            sns.kdeplot(df.Obs, color='darkslategrey')
            sns.kdeplot(df.CMAQ, color='dodgerblue', label=label)

        sns.despine()
        if not cumulative:
            plt.xlim([0, maxval])
        plt.xlabel(df.Species.unique()[0] + '  (' + df.Units.unique()[0] + ')')
        plt.title(title)
        plt.gca().axes.set_ylabel('P(' + df.Species.unique()[0] + ')')
        if footer:
            footer_text(df)
        plt.tight_layout()
        plt.grid(alpha=.5)
    else:
        ax = fig.get_axes()[0]
        sns.kdeplot(df.CMAQ, ax=ax, label=label, cumulative=cumulative)
Exemple #13
0
def fetch_hqm():
    hqm_columns = [
        'Ticker',
        'Company Name',
        'Price',
        'Shares to Buy',
        'HQM Score',
        'One-Year Price Return',
        'One-Year Return Percentile',
        'Six-Month Price Return',
        'Six-Month Return Percentile',
        'Three-Month Price Return',
        'Three-Month Return Percentile',
        'One-Month Price Return',
        'One-Month Return Percentile'
    ]

    stocks = pd.read_csv('sp_500_stocks.csv')
    smaller_chunks = np.array_split(stocks['Ticker'], 10)
    hqm_dataframe = pd.DataFrame(columns=hqm_columns)
    position_size = math.floor(PORTFOLIO_SIZE/TOP_XX_STOCKS)
    # for stocks_chunk in smaller_chunks[:2]:
    for stocks_chunk in smaller_chunks:
        stocks_list = ''
        stocks_list = ','.join(stocks_chunk)
        batch_api_url = f'https://sandbox.iexapis.com/stable/stock/market/batch?symbols={stocks_list}&types=price,stats&token={TOKEN}'
        try:
            req_result = requests.get(batch_api_url)
            print(req_result.status_code)
            data = req_result.json()
            print(data.keys())
            for symbol in stocks_chunk:
                company_name = data[symbol]['stats']['companyName']
                stock_price = data[symbol]['price']
                shares_to_buy = math.floor(position_size/stock_price)
                hqmScore = 'N/A'
                year1PriceChangePercent = data[symbol]['stats']['year1ChangePercent']
                year1ReturnPercent = 'N/A'
                month6PriceChangePercent = data[symbol]['stats']['month6ChangePercent']
                month6ReturnPercent = 'N/A'
                month3PriceChangePercent = data[symbol]['stats']['month3ChangePercent']
                month3ReturnPercent = 'N/A'
                month1PriceChangePercent = data[symbol]['stats']['month1ChangePercent']
                month1ReturnPercent = 'N/A'
                hqm_dataframe = hqm_dataframe.append(
                    pd.Series(
                        [
                            symbol,
                            company_name,
                            stock_price,
                            shares_to_buy,
                            hqmScore,
                            year1PriceChangePercent,
                            month1ReturnPercent,
                            month6PriceChangePercent,
                            month6ReturnPercent,
                            month3PriceChangePercent,
                            month3ReturnPercent,
                            month1PriceChangePercent,
                            month1ReturnPercent
                        ],
                        index = hqm_columns
                    ),
                    ignore_index = True
                )

        except:
            print("Houston, we have a problem")


    time_periods = [
        'One-Year',
        'Six-Month',
        'Three-Month',
        'One-Month',
    ]
    for row in hqm_dataframe.index:
        momentum_percentiles = []
        co_name = hqm_dataframe.loc[row, 'Ticker']
        print(co_name)
        for time_period in time_periods:
            col_price = f'{time_period} Price Return'
            col_percentile = f'{time_period} Return Percentile'
            hqm_dataframe.loc[row, col_percentile] = score(hqm_dataframe[col_price], hqm_dataframe.loc[row, col_price])/100
            momentum_percentiles.append(hqm_dataframe.loc[row, col_percentile])
        hqm_dataframe.loc[row, 'HQM Score'] = mean(momentum_percentiles)

    # Now sort and rank the top 50 momentum stocks
    hqm_dataframe.sort_values('HQM Score', ascending=False, inplace=True)
    # hqm_dataframe = hqm_dataframe[:TOP_XX_STOCKS]
    # reset all indices
    # hqm_dataframe.reset_index(drop=True, inplace=True)
    return hqm_dataframe
Exemple #14
0
                                                       index=hqm_columns),
                                             ignore_index=True)

################### CALCULATE MOMENTUM PERCENTILES #########################

time_periods = ['One-Year', 'Six-Month', 'Three-Month', 'One-Month']

hqm_dataframe.fillna(value=0.0, inplace=True)

for row in hqm_dataframe.index:
    for time_period in time_periods:
        change_col = f'{time_period} Price Return'
        percentile_col = f'{time_period} Return Percentile'
        a = hqm_dataframe[change_col]
        b = hqm_dataframe.loc[row, change_col]
        hqm_dataframe.loc[row, percentile_col] = score(a, b)

##################3
from statistics import mean

for row in hqm_dataframe.index:
    momentum_percentiles = []
    for time_period in time_periods:
        momentum_percentiles.append(
            hqm_dataframe.loc[row, f'{time_period} Return Percentile'])
    hqm_dataframe.loc[row, 'HQM Score'] = mean(momentum_percentiles)

############# select the 50 best momentum stocks

hqm_dataframe.sort_values('HQM Score', ascending=False, inplace=True)
hqm_dataframe = hqm_dataframe[:5]
Exemple #15
0
def diffscatter_param(df, title=None, fig=None, label=None, footer=True):
    """Short summary.

    Parameters
    ----------
    df : type
        Description of parameter `df`.
    title : type
        Description of parameter `title` (the default is None).
    fig : type
        Description of parameter `fig` (the default is None).
    label : type
        Description of parameter `label` (the default is None).
    footer : type
        Description of parameter `footer` (the default is True).

    Returns
    -------
    type
        Description of returned object.

    """
    from scipy.stats import scoreatpercentile as score
    from numpy import isnan
    sns.set_style('ticks')
    df = df.dropna()
    mask = ~isnan(df.Obs.values) & ~isnan(df.CMAQ.values)
    if fig is None:
        species, units = df.Species.unique()[0], df.Units.unique()[0]
        maxval = score(df.Obs.values[mask], per=99.9)
        minvaly = score(df.CMAQ.values[mask] - df.Obs.values[mask], per=.1)
        maxvaly = score(df.CMAQ.values[mask] - df.Obs.values[mask], per=99.9)
        plt.figure(figsize=(10, 7))

        plt.scatter(df.Obs.values[mask],
                    df.CMAQ.values[mask] - df.Obs.values[mask],
                    c='cornflowerblue',
                    marker='o',
                    edgecolors='w',
                    alpha=.3,
                    label=label)
        plt.plot((0, maxval), (0, 0), '--', color='darkslategrey')

        plt.xlim([0, maxval])
        plt.ylim([minvaly, maxvaly])
        plt.xlabel('Obs ' + species + ' (' + units + ')')
        plt.title(title)
        plt.gca().axes.set_ylabel('Model - Obs ' + species + ' (' + units +
                                  ')')
        if footer:
            footer_text(df)
        plt.tight_layout()
    else:
        ax = fig.get_axes()[0]
        mask = ~isnan(df.Obs.values) & ~isnan(df.CMAQ.values)
        ax.scatter(df.Obs.values[mask],
                   df.CMAQ.values[mask] - df.Obs.values[mask],
                   marker='o',
                   edgecolors='w',
                   alpha=.3,
                   label=label)
        plt.legend(loc='best')
Exemple #16
0
    rv_dataframe[column].fillna(rv_dataframe[column].mean(), inplace=True)

rv_dataframe[rv_dataframe.isnull().any(axis=1)]

from scipy.stats import percentileofscore as score

metrics = {
    'Price-to-Earnings Ratio': 'PE Percentaile',
    'Price-to-Book Ratio': 'PB Percentile',
    'Price-to-Sales Ratio': 'PS Percentile',
    'EV/EBITDA': 'EV/EBITDA Percentile',
    'EV/GP': 'EV/GP Percentile'
}
for metric in metrics.keys():
    for row in rv_dataframe.index:
        rv_dataframe.loc[row, metrics[metric]] = score(
            rv_dataframe[metric], rv_dataframe.loc[row, metric]) / 100

#print(rv_dataframe)

from statistics import mean

for row in rv_dataframe.index:
    value_percentiles = []
    for metric in metrics.keys():
        value_percentiles.append(rv_dataframe.loc[row, metrics[metric]])

    rv_dataframe.loc[row, 'RV Score'] = mean(value_percentiles)
#print(rv_dataframe)

rv_dataframe.sort_values('RV Score', ascending=True, inplace=True)
rv_dataframe = rv_dataframe[:50]
Exemple #17
0
# CALCULATING DAY CHANGE OF STOCKS

dc = []
for i in sp500.columns:
    dc.append(sp500[i].pct_change().sum())
    
sp500_momentum = pd.DataFrame(columns = ['symbol', 'day_change'])
sp500_momentum['symbol'] = sp500.columns
sp500_momentum['day_change'] = dc

# CALCULATING MOMENTUM

sp500_momentum['momentum'] = 'N/A'
for i in range(len(sp500_momentum)):
    sp500_momentum.loc[i, 'momentum'] = score(sp500_momentum.day_change, sp500_momentum.loc[i, 'day_change'])/100
    
sp500_momentum['momentum'] = sp500_momentum['momentum'].astype(float)    
print(sp500_momentum.head())

top_picks = sp500_momentum.nlargest(10, 'momentum')['symbol'].reset_index().drop('index', axis = 1)
print(top_picks)

# BACKTEST

portfolio_val = 1000000
per_stock_val = portfolio_val/len(top_picks)

day_close = []
for i in top_picks['symbol']:
    data = sp500[i]