Python getData Examples, getdata.getData Python Examples

Example #1

0

Show file

File: survival.py Project: EJHortala/books-2

def main():
    '''The data in this example give the life talbe for motion sickness data
    from an experiment with vertical movement at a frequency of 0.167 Hz and
    acceleration 0.111 g, and of a second experiment with 0.333 Hz and acceleration
    of 0.222 g.
    '''

    # get the data
    data1 = getData('altman_13_2.txt', subDir='..\Data\data_altman')
    data2 = getData('altman_13_3.txt', subDir='..\Data\data_altman')

    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1, se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2, se2) = kaplanmeier(data2)

    # Make a combined plot for both datasets
    plt.step(t1, sp1, where='post')
    plt.hold(True)
    plt.step(t2, sp2, 'r', where='post')

    plt.legend(['Data1', 'Data2'])
    plt.ylim(0, 1)
    plt.xlabel('Time')
    plt.ylabel('Survival Probability')
    plt.show()

    # Check the hypothesis that the two survival curves are the same
    # --- >>> START stats <<< ---
    (p, X2) = logrank(data1, data2)
    # --- >>> STOP stats <<< ---

    return p  # supposed to be 0.073326322306832212

Example #2

0

Show file

File: survival.py Project: fluxium/statsintro

def main():
    '''The data in this example give the life talbe for motion sickness data
    from an experiment with vertical movement at a frequency of 0.167 Hz and
    acceleration 0.111 g, and of a second experiment with 0.333 Hz and acceleration
    of 0.222 g.
    '''
    
    # get the data
    data1 = getData('altman_13_2.txt', subDir='..\Data\data_altman')
    data2 = getData('altman_13_3.txt', subDir='..\Data\data_altman')
    
    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1,se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2,se2) = kaplanmeier(data2)
    
    # Make a combined plot for both datasets
    plt.step(t1,sp1, where='post')
    plt.hold(True)
    plt.step(t2,sp2,'r', where='post')
    
    plt.legend(['Data1', 'Data2'])
    plt.ylim(0,1)
    plt.xlabel('Time')
    plt.ylabel('Survival Probability')
    plt.show()
    
    # Check the hypothesis that the two survival curves are the same
    # --- >>> START stats <<< ---
    (p, X2) = logrank(data1, data2)
    # --- >>> STOP stats <<< ---
    
    return p    # supposed to be 0.073326322306832212

Example #3

0

Show file

File: analyzer.py Project: piivonen/BajarPulse

def construct_ranking_table(portfolio,lookbackDays=None,oldTable=None,enddate=dt.today(),index=None,silent=False):
  dfarr = []
  for stock in portfolio.stocks:
    if index == None:
      idx = getdata.getIndexTicker(getdata.getData([stock.ticker],getdata.getParamDict('stock exchange')))
    else:
      idx = index
    indexCurrVal = getdata.get_history([idx],
                                          dt.today()-relativedelta(days=5))[-1:]['Close'].values[0]
    try:
      indexPrevVal = oldTable.ix[stock.ticker]['%Gain Index (Period)']
    except:
      indexPrevVal = getdata.get_history([idx],
                                          dt.today()-relativedelta(days=lookbackDays))['Close'].values[0]
    dftemp = pd.DataFrame(data=[0],index=[0])
    #dftemp['Date'] = dt.today()
    dftemp['Symbol'] = stock.ticker
    dftemp['Name'] = getdata.getData([stock.ticker],"n")
    dftemp['Price'] = getdata.get_history([stock.ticker],
                                          dt.today()-relativedelta(days=5))[-1:]['Close'].values[0]
    dftemp['Number Owned'] = stock.shares_owned
    dftemp['Current Value'] = dftemp['Price']*dftemp['Number Owned']
    try:
      dftemp['Previous Value'] = oldTable.ix[stock.ticker]['Current Value']
    except:
      dftemp['Previous Value'] = dftemp['Number Owned']*getdata.get_history([stock.ticker],
                                          dt.today()-relativedelta(days=lookbackDays))['Close'].values[0]
    dftemp['%Gain (Period)'] = 100*(dftemp['Current Value'] - dftemp['Previous Value'])/dftemp['Previous Value']
    dftemp['%Gain Index (Period)'] = 100*(indexCurrVal - indexPrevVal)/indexPrevVal
    dftemp['Index'] = idx
    dftemp['GainSt - GainIdx'] = dftemp['%Gain (Period)'] - dftemp['%Gain Index (Period)']
    #if dftemp['%Gain Index (Period)'] > dftemp['%Gain (Period)']:
    #  dftemp['Stock > Index'] = 'N'
    #else:
    #  dftemp['Stock > Index'] = 'Y'
    dftemp['Total Investment'] = stock.investment
    dftemp['%Total Gain'] = 100*(dftemp['Current Value'] - stock.investment)/stock.investment
    dftemp = dftemp.drop(0,axis=1)
    dftemp.index = dftemp['Symbol']
    dftemp = dftemp.drop('Symbol',axis=1)
    dfarr.append(dftemp)
  dftoret = pd.concat(dfarr)
  #dftoret = dftoret.sort(columns='Stock > Index',ascending=False)
  dftoret = dftoret.sort(columns='GainSt - GainIdx',ascending=False)
  dftoret = dftoret.drop('GainSt - GainIdx',axis=1)
  dftoret['Rank'] = range(1,len(dftoret)+1)
  if not silent:
    print "Stocks doing better than the market:",dftoret[dftoret['%Gain (Period)']>dftoret['%Gain Index (Period)']].index.values
    print "Stocks doing worse than the market:",dftoret[dftoret['%Gain (Period)']<dftoret['%Gain Index (Period)']].index.values
  return dftoret

Example #4

0

Show file

File: anovaOneway.py Project: josef-pkt/statsintro

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal. '''
    
    # Get the data
    data = getData('altman_910.txt')
    
    # Sort them into groups, according to column 1
    group1 = data[data[:,1]==1,0]
    group2 = data[data[:,1]==2,0]
    group3 = data[data[:,1]==3,0]
    
    # First, check if the variances are equal, with the "Levene"-test
    (W,p) = stats.levene(group1, group2, group3)
    if p<0.05:
        print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p))
    
    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)
    
    # Print the results
    print 'Altman 910:'
    print (F_statistic, pVal)
    if pVal < 0.05:
        print('One of the groups is significantly different.')
        
    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])    
    model = ols('value ~ C(treatment)', df).fit()
    print anova_lm(model)

Example #5

0

Show file

File: oneSample.py Project: josef-pkt/statsintro

def check_mean():        
    '''Data from Altman, check for significance of mean value.
    Compare average daily energy intake (kJ) over 10 days of 11 healthy women, and
    compare it to the recommended level of 7725 kJ.
    '''
    # Get data from Altman

    data = getData('altman_91.txt')

    # Watch out: by default the SD is calculated with 1/N!
    myMean = np.mean(data)
    mySD = np.std(data, ddof=1)
    print 'Mean and SD: {0:4.2f} and {1:4.2f}'.format(myMean, mySD)

    # Confidence intervals
    tf = stats.t(len(data)-1)
    ci = np.mean(data) + stats.sem(data)*np.array([-1,1])*tf.isf(0.025)
    print 'The confidence intervals are {0:4.2f} to {1:4.2f}.'.format(ci[0], ci[1])

    # Check for significance
    checkValue = 7725
    t, prob = stats.ttest_1samp(data, checkValue)
    if prob < 0.05:
        print '{0:4.2f} is significantly different from the mean (p={1:5.3f}).'.format(checkValue, prob)

    # For not normally distributed data, use the Wilcoxon signed rank test
    (rank, pVal) = stats.wilcoxon(data-checkValue)
    if pVal < 0.05:
      issignificant = 'unlikely'
    else:
      issignificant = 'likely'
      
    print 'It is ' + issignificant + ' that the value is {0:d}'.format(checkValue)

Example #6

0

Show file

def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''

    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt')

    mean(data, axis=0)
    std(data, axis=0, ddof=1)

    pre = data[:, 0]
    post = data[:, 1]

    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)

    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print("paired t-test", p_value)

    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    z_statistic, p_value = stats.wilcoxon(post - pre)
    print("paired wilcoxon-test", p_value)

Example #7

0

Show file

File: oneSample.py Project: EJHortala/books-2

def check_mean():        
    '''Data from Altman, check for significance of mean value.
    Compare average daily energy intake (kJ) over 10 days of 11 healthy women, and compare it to the recommended level of 7725 kJ.
    '''
    # Get data from Altman
    data = getData('altman_91.txt', subDir='..\Data\data_altman')

    # Watch out: by default the SD is calculated with 1/N!
    myMean = np.mean(data)
    mySD = np.std(data, ddof=1)
    print(('Mean and SD: {0:4.2f} and {1:4.2f}'.format(myMean, mySD)))

    # Confidence intervals
    tf = stats.t(len(data)-1)
    ci = np.mean(data) + stats.sem(data)*np.array([-1,1])*tf.ppf(0.975)
    print(('The confidence intervals are {0:4.2f} to {1:4.2f}.'.format(ci[0], ci[1])))

    # Check for significance
    checkValue = 7725
    # --- >>> START stats <<< ---
    t, prob = stats.ttest_1samp(data, checkValue)
    if prob < 0.05:
        print(('{0:4.2f} is significantly different from the mean (p={1:5.3f}).'.format(checkValue, prob)))

    # For not normally distributed data, use the Wilcoxon signed rank test
    (rank, pVal) = stats.wilcoxon(data-checkValue)
    if pVal < 0.05:
      issignificant = 'unlikely'
    else:
      issignificant = 'likely'
    # --- >>> STOP stats <<< ---
      
    print(('It is ' + issignificant + ' that the value is {0:d}'.format(checkValue)))
    
    return prob # should be 0.018137235176105802

Example #8

0

Show file

File: anovaOneway.py Project: ing7t/kod

def anova_byHand():
    """Calculate the ANOVA by hand"""

    # Get the data
    data = getData('altman_910.txt', subDir='..\Data\data_altman')

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=['values', 'group'])
    groups = df.groupby('group')

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['values'] - df['values'].mean())**2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group['values'] - group['values'].mean())**2)
        ss_treatments += len(group) * (
            group['values'].mean() - df['values'].mean())**2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print('ANOVA-Results: F = {0}, and p<{1}'.format(F, p))

    return (F, p)

Example #9

0

Show file

def getSchedule():
    x = PrettyTable()
    x.border = False
    dtm = datetime.datetime.now()
    dtm = dtm - timedelta(days=1)
    fdtm = utc.localize(dtm)
    bs = gd.getData(2)
    x.field_names = ["Home", "Away", "Score", "Date", "Time"]
    val = 0
    for i in range(0, len(bs), 1):
        ldtm = bs[i]['start_time']
        if (fdtm < ldtm and val < 5):
            a = bs[i]['home_team'].name.replace('_', " ")
            b = bs[i]['away_team'].name.replace('_', " ")
            index1 = a.rfind(" ")
            index2 = b.rfind(" ")
            a = a[index1:]
            b = b[index2:]
            c = bs[i]['home_team_score']
            d = bs[i]['away_team_score']
            e = str(c) + "-" + str(d)
            x.add_row([
                a, b, e, bs[i]['start_time'].strftime("%d %b"),
                bs[i]['start_time'].strftime("%H:%M")
            ])
            val = val + 1
    return x

Example #10

0

Show file

File: app.py Project: naaaaaaaaaaaf/fushinsha-generator

def genModel(elements):
    date_index = pandas.date_range(start="2018-01", end="2021-01", freq="M").to_series().dt.strftime("%Y%m")
    for i in elements:
        path = 'chainfiles/' + i + '.json'
        size = 2 if i == 'iti' else 3
        list = getdata.getData(i, date_index)
        exportModel.generateAndExport(list, path, size)

Example #11

0

Show file

File: multivariate.py Project: b-rodrigues/statsintro

def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''
    
    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt')
    
    mean(data, axis=0)
    std(data, axis=0, ddof=1)
    
    pre = data[:,0]
    post = data[:,1]
    
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)
    
    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print("paired t-test", p_value)
    
    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    z_statistic, p_value = stats.wilcoxon(post - pre)
    print("paired wilcoxon-test", p_value)

Example #12

0

Show file

File: twoSample.py Project: drlinus/PY_1st

def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''

    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt', subDir=r'..\Data\data_altman')

    np.mean(data, axis=0)
    np.std(data, axis=0, ddof=1)

    pre = data[:, 0]
    post = data[:, 1]

    # --- >>> START stats <<< ---
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)

    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print(("paired t-test", p_value))

    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    rankSum, p_value = stats.wilcoxon(post - pre)
    # --- >>> STOP stats <<< ---
    print(("Wilcoxon-Signed-Rank-Sum test", p_value))

    return p_value  # should be 0.0033300139117459797

Example #13

0

Show file

File: _interface.py Project: JcYBalaBalA/Graduation-Project

def train_interface(path="save", modle="began",dataset="MNIST"):
    modPath = path + "/models"
    imgPath = path + "/images"

    os.makedirs(path, exist_ok=True)
    os.makedirs(path + "/images", exist_ok=True)
    os.makedirs(path + "/models", exist_ok=True)

    # n_epochs batch_size lr b1 b2 n_cpu latent_dim img_size channels sample_interval n_classes(仅acgan有)
    opts = {
        "acgan" : [200,64,0.0002,0.5,0.999,8,100,32,1,400,10],
        "began" : [200,64,0.0002,0.5,0.999,8,62,32,1,400],
        "dcgan" : [200,64,0.0002,0.5,0.999,8,100,32,1,400]
    }
    opt = mod.model_opt(opts[modle])
    print(modle + "_opt:")
    opt.list_all_member()

    opt.cuda = opt.cuda if torch.cuda.is_available() else False

    trainloader, testloader = gd.getData(dataset,opt.batch_size,gd.channel_1_transform(opt.img_size))
    if modle == "acgan":
        acgan = mod.acgan()
        acgan.train(opt, trainloader, modPath, imgPath)
    elif modle == "began":
        began = mod.began()
        began.train(opt, modPath, imgPath, dataloader=trainloader)
    elif modle == "dcgan":
        dcgan = mod.dcgan()
        dcgan.train(opt, modPath, imgPath, dataloader=trainloader)

Example #14

0

Show file

def anova_byHand():
    """ Calculate the ANOVA by hand. While you would normally not do that, this function shows
    how the underlying values can be calculated.
    """

    # Get the data
    data = getData('altman_910.txt', subDir='.')

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=['values', 'group'])
    groups = df.groupby('group')

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df['values'] - df['values'].mean())**2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group['values'] - group['values'].mean())**2)
        ss_treatments += len(group) * (group['values'].mean() -
                                       df['values'].mean())**2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print(('ANOVA-Results: F = {0}, and p<{1}'.format(F, p)))

    return (F, p)

Example #15

0

Show file

File: twoSample.py Project: CeasarSS/books

def paired_data():
    '''Analysis of paired data
    Compare mean daily intake over 10 pre-menstrual and 10 post-menstrual days (in kJ).'''
    
    # Get the data:  daily intake of energy in kJ for 11 women
    data = getData('altman_93.txt', subDir=r'..\Data\data_altman')
    
    mean(data, axis=0)
    std(data, axis=0, ddof=1)
    
    pre = data[:,0]
    post = data[:,1]
    
    # --- >>> START stats <<< ---
    # paired t-test: doing two measurments on the same experimental unit
    # e.g., before and after a treatment
    t_statistic, p_value = stats.ttest_1samp(post - pre, 0)
    
    # p < 0.05 => alternative hypothesis:
    # the difference in mean is not equal to 0
    print(("paired t-test", p_value))
    
    # alternative to paired t-test when data has an ordinary scale or when not
    # normally distributed
    rankSum, p_value = stats.wilcoxon(post - pre)
    # --- >>> STOP stats <<< ---
    print(("Wilcoxon-Signed-Rank-Sum test", p_value))
    
    return p_value # should be 0.0033300139117459797

Example #16

0

Show file

def getTeamBoxScores():
    x = PrettyTable()
    x.border = False
    stand = gd.getData(1)
    if (stand == []):
        return 0
    else:
        x.field_names = [
            "Team", "Outcome", "FG%", "3P%", "AST", "TREB", "BLK", "STL", "TO"
        ]
        for i in range(0, len(stand), 1):
            x.add_row([
                stand[i]['team'].name.replace('_',
                                              " "), stand[i]['outcome'].name,
                round(
                    float(stand[i]['made_field_goals']) * 100.0 /
                    float(stand[i]['attempted_field_goals'])),
                round(
                    float(stand[i]['made_three_point_field_goals']) * 100.0 /
                    float(stand[i]['attempted_three_point_field_goals'])),
                stand[i]['assists'], stand[i]['offensive_rebounds'] +
                stand[i]['defensive_rebounds'], stand[i]['blocks'],
                stand[i]['steals'], stand[i]['turnovers']
            ])
        return x

Example #17

0

Show file

File: analyzer.py Project: amol-desai/BajarPulse

def calc_sharpe_ratio_sym(data,
                          symbol,
                          lookbackDays,
                          enddate=dt.today(),
                          index=None,
                          silent=False):
    #data: Dataframe containing at least date for the required time period for the required symbol and index
    #symbol: Symbol for which sharpe ratio is to be calculated
    #lookbackDays: # of days to look back from the enddate to calculate sharpe ratio
    #enddate: Last day for sharpe ratio calculation
    #index: Reference index to be used
    #silent: silence print statements
    if index == None:
        index = getdata.getIndexTicker(
            getdata.getData([symbol], getdata.getParamDict('stock exchange')))
    #data = getdata.get_history([symbol,index],dt.today()-relativedelta(days=days))
    #data = data.drop(['Open','High','Low','Close','Volume'],axis=1)
    #data = data.unstack(0).swaplevel(0,1,axis=1).sortlevel(0,axis=1)
    data = append_return(data)
    r_sym = data.ix[symbol]['return'].ix[(
        enddate - relativedelta(days=lookbackDays)):enddate]
    r_index = data.ix[index]['return'].ix[(
        enddate - relativedelta(days=lookbackDays)):enddate]
    r_sym_mean = r_sym.mean()
    r_index_mean = r_index.mean()
    std_sym_wrt_index = (r_sym - r_index).std()
    answer = (r_sym_mean - r_index_mean) / std_sym_wrt_index
    if not silent:
        print days, "day Sharpe Ratio for", symbol, "=", answer
    return answer

Example #18

0

Show file

File: anovaOneway.py Project: phaustin/statsintro

def anova_byHand():
    """ Calculate the ANOVA by hand. While you would normally not do that, this function shows
    how the underlying values can be calculated.
    """

    # Get the data
    data = getData("altman_910.txt", subDir="..\Data\data_altman")

    # Convert them to pandas-forman and group them by their group value
    df = pd.DataFrame(data, columns=["values", "group"])
    groups = df.groupby("group")

    # The "total sum-square" is the squared deviation from the mean
    ss_total = np.sum((df["values"] - df["values"].mean()) ** 2)

    # Calculate ss_treatment and  ss_error
    (ss_treatments, ss_error) = (0, 0)
    for val, group in groups:
        ss_error += sum((group["values"] - group["values"].mean()) ** 2)
        ss_treatments += len(group) * (group["values"].mean() - df["values"].mean()) ** 2

    df_groups = len(groups) - 1
    df_residuals = len(data) - len(groups)
    F = (ss_treatments / df_groups) / (ss_error / df_residuals)
    df = stats.f(df_groups, df_residuals)
    p = df.sf(F)

    print(("ANOVA-Results: F = {0}, and p<{1}".format(F, p)))

    return (F, p)

Example #19

0

Show file

def correlation():
    '''Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    comparing age and %fat (measured by dual-photon absorptiometry) for 18 normal adults.'''

    # Get the data
    data = getData('altman_11_1.txt', subDir='..\Data\data_altman')
    x = data[:, 0]
    y = data[:, 1]

    # --- >>> START stats <<< ---
    # Calculate correlations
    corr = {}
    corr['pearson'], _ = stats.pearsonr(x, y)
    corr['spearman'], _ = stats.spearmanr(x, y)
    corr['kendall'], _ = stats.kendalltau(x, y)
    # --- >>> STOP stats <<< ---

    print(corr)

    # Assert that Spearman's rho is just the correlation of the ranksorted data
    np.testing.assert_almost_equal(
        corr['spearman'],
        stats.pearsonr(stats.rankdata(x), stats.rankdata(y))[0])

    return corr['pearson']  # should be 0.79208623217849117

Example #20

0

Show file

File: multivariate.py Project: josef-pkt/statsintro

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas'''
    
    # Get the data
    data = getData('altman_11_6.txt')
    
    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print model.summary

Example #21

0

Show file

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas'''

    # Get the data
    data = getData('altman_11_6.txt')

    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print model.summary

Example #22

0

Show file

def getTeams():
    x = PrettyTable()
    x.border = False
    stand = gd.getData(3)
    stand.sort(reverse=True, key=lambda x: x['wins'])
    x.field_names = ["Team Name"]
    for i in range(0, len(stand), 1):
        x.add_row([stand[i]['team'].name.replace('_', " ")])
    return x

Example #23

0

Show file

 def __getPicUrls(self):
     g = getData()
     g.transform()
     for k in g.data:
         subdic = {}
         self.picdic[self.index] = subdic
         subdic['Code'] = k['Code']
         subdic['PicUrl'] = k['PicUrls']
         self.index += 1
     return self.picdic

Example #24

0

Show file

File: portfolio.py Project: amol-desai/BajarPulse

    def getHistory(self,start_date):
        tickers = []
        for stock in self.stocks:
            ticker = stock.ticker
            tickers.append(ticker)
            index = getdata.getIndexTicker(getdata.getData([ticker],
                                getdata.getParamDict('stock exchange')))
            if index not in tickers:
                tickers.append(index)
		return getdata.get_history(tickers,start_date)

Example #25

0

Show file

File: multivariate.py Project: sampathweb/statsintro

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas'''
    
    # Get the data
    data = getData('altman_11_6.txt', subDir=r'..\Data\data_altman')
    
    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print(model.summary)
    
    return model.f_stat['f-stat'] # should be 4.4140184331462571

Example #26

0

Show file

def home_page():
    data = getdata.getData()

    # Populate data with synonyms
    #    for i in range(0, len(data)):
    #        data.extend(synonyms.getSynonyms(data[i]))

    products = getproducts.getProducts(data)
    for listing in products:
        print(listing.IMAGE_URL)
    return render_template("main_page.html", products=products)

Example #27

0

Show file

File: file_spider.py Project: q97585248/spider0002

 def __getFileUrls(self):
     g = getData()
     g.transform()
     for k in g.data:
         subdic={}
         self.filedic[self.index] = subdic
         subdic['Code'] = k['Code']
         subdic['FileMark'] = k['FileMark']
         subdic['FileUrl'] = k['FileUrl']
         self.index+=1
     return self.filedic

Example #28

0

Show file

File: etme.py Project: schlosser/EtMe

def home_page():
    data = getdata.getData()

    # Populate data with synonyms
#    for i in range(0, len(data)):
#        data.extend(synonyms.getSynonyms(data[i]))

    products = getproducts.getProducts(data)
    for listing in products:
        print(listing.IMAGE_URL)
    return render_template("main_page.html", products=products)

Example #29

0

Show file

 def getHistory(self, start_date):
     tickers = []
     for stock in self.stocks:
         ticker = stock.ticker
         tickers.append(ticker)
         index = getdata.getIndexTicker(
             getdata.getData([ticker],
                             getdata.getParamDict('stock exchange')))
         if index not in tickers:
             tickers.append(index)
             return getdata.get_history(tickers, start_date)

Example #30

0

Show file

File: multivariate.py Project: ing7t/kod

def regression_line():
    """Fit a line, using the powerful "ordinary least square" method of pandas"""

    # Get the data
    data = getData("altman_11_6.txt", subDir=r"..\Data\data_altman")

    df = pd.DataFrame(data, columns=["glucose", "Vcf"])
    model = pd.ols(y=df["Vcf"], x=df["glucose"])
    print(model.summary)

    return model.f_stat["f-stat"]  # should be 4.4140184331462571

Example #31

0

Show file

File: multivariate.py Project: EJHortala/books-2

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas'''

    # Get the data
    data = getData('altman_11_6.txt', subDir=r'..\Data\data_altman')

    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print(model.summary)

    return model.f_stat['f-stat']  # should be 4.4140184331462571

Example #32

0

Show file

def getStandings():
    x = PrettyTable()
    x.border = False
    stand = gd.getData(3)
    stand.sort(reverse=True, key=lambda x: x['wins'])
    x.field_names = ["Team", "W", "L", "Division"]
    for i in range(0, len(stand), 1):
        x.add_row([
            stand[i]['team'].name.replace('_', " "), stand[i]['wins'],
            stand[i]['losses'], stand[i]['division'].name
        ])
    return x

Example #33

0

Show file

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal.
    
    Twenty-two patients undergoing cardiac bypass surgery were randomized to one of three ventilation groups:
    
    Group I: Patients received 50% nitrous oxide and 50% oxygen mixture continuously for 24 h.
    Group II: Patients received a 50% nitrous oxide and 50% oxygen mixture only dirng the operation.
    Group III: Patients received no nitrous oxide but received 35-50% oxygen for 24 h.
    
    The data show red cell folate levels for the three groups after 24h' ventilation.
    
    '''

    # Get the data
    print('One-way ANOVA: -----------------')
    data = getData('altman_910.txt', subDir='..\Data\data_altman')

    # Sort them into groups, according to column 1
    group1 = data[data[:, 1] == 1, 0]
    group2 = data[data[:, 1] == 2, 0]
    group3 = data[data[:, 1] == 3, 0]

    # --- >>> START stats <<< ---
    # First, check if the variances are equal, with the "Levene"-test
    (W, p) = stats.levene(group1, group2, group3)
    if p < 0.05:
        print(
            ('Warning: the p-value of the Levene test is <0.05: p={0}'.format(
                p)))

    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)
    # --- >>> STOP stats <<< ---

    # Print the results
    print('Data form Altman 910:')
    print((F_statistic, pVal))
    if pVal < 0.05:
        print('One of the groups is significantly different.')

    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])
    model = ols('value ~ C(treatment)', df).fit()
    anovaResults = anova_lm(model)
    print(anovaResults)

    # Check if the two results are equal. If they are, there is no output
    np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0])

    return (F_statistic, pVal
            )  # should be (3.711335988266943, 0.043589334959179327)

Example #34

0

Show file

File: anovaTwoway.py Project: josef-pkt/statsintro

def anova_interaction():
    '''ANOVA with interaction: Measurement of fetal head circumference,
    by four observers in three fetuses.'''
    
    # Get the data
    data = getData('altman_12_6.txt')
    
    # Bring them in dataframe-format
    df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])
    
    # Determine the ANOVA with interaction
    formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
    lm = ols(formula, df).fit()
    print anova_lm(lm)

Example #35

0

Show file

def anova_interaction():
    '''ANOVA with interaction: Measurement of fetal head circumference,
    by four observers in three fetuses.'''

    # Get the data
    data = getData('altman_12_6.txt')

    # Bring them in dataframe-format
    df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])

    # Determine the ANOVA with interaction
    formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
    lm = ols(formula, df).fit()
    print anova_lm(lm)

Example #36

0

Show file

File: survival.py Project: CeasarSS/books

def main():
    # get the data
    data1 = getData('altman_13_2.txt', subDir='..\Data\data_altman')
    data2 = getData('altman_13_3.txt', subDir='..\Data\data_altman')
    
    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1,se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2,se2) = kaplanmeier(data2)
    
    # Make a combined plot for both datasets
    plt.step(t1,sp1, where='post')
    plt.hold(True)
    plt.step(t2,sp2,'r', where='post')
    
    plt.legend(['Data1', 'Data2'])
    plt.ylim(0,1)
    plt.xlabel('Time')
    plt.ylabel('Survival Probability')
    plt.show()
    
    # Check the hypothesis that the two survival curves are the same
    (p, X2) = logrank(data1, data2)
    
    return p    # supposed to be 0.073326322306832212

Example #37

0

Show file

def main():
    # get the data
    data1 = getData('altman_13_2.txt', subDir='..\Data\data_altman')
    data2 = getData('altman_13_3.txt', subDir='..\Data\data_altman')

    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1, se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2, se2) = kaplanmeier(data2)

    # Make a combined plot for both datasets
    plt.step(t1, sp1, where='post')
    plt.hold(True)
    plt.step(t2, sp2, 'r', where='post')

    plt.legend(['Data1', 'Data2'])
    plt.ylim(0, 1)
    plt.xlabel('Time')
    plt.ylabel('Survival Probability')
    plt.show()

    # Check the hypothesis that the two survival curves are the same
    (p, X2) = logrank(data1, data2)

    return p  # supposed to be 0.073326322306832212

Example #38

0

Show file

File: anovaOneway.py Project: CeasarSS/books

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal.
    
    Twenty-two patients undergoing cardiac bypass surgery were randomized to one of three ventilation groups:
    
    Group I: Patients received 50% nitrous oxide and 50% oxygen mixture continuously for 24 h.
    Group II: Patients received a 50% nitrous oxide and 50% oxygen mixture only dirng the operation.
    Group III: Patients received no nitrous oxide but received 35-50% oxygen for 24 h.
    
    The data show red cell folate levels for the three groups after 24h' ventilation.
    
    '''
    
    # Get the data
    print('One-way ANOVA: -----------------')
    data = getData('altman_910.txt', subDir='..\Data\data_altman')
    
    # Sort them into groups, according to column 1
    group1 = data[data[:,1]==1,0]
    group2 = data[data[:,1]==2,0]
    group3 = data[data[:,1]==3,0]
    
    # --- >>> START stats <<< ---
    # First, check if the variances are equal, with the "Levene"-test
    (W,p) = stats.levene(group1, group2, group3)
    if p<0.05:
        print(('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p)))
    
    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)
    # --- >>> STOP stats <<< ---
    
    # Print the results
    print('Data form Altman 910:')
    print((F_statistic, pVal))
    if pVal < 0.05:
        print('One of the groups is significantly different.')
        
    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])    
    model = ols('value ~ C(treatment)', df).fit()
    anovaResults = anova_lm(model)
    print(anovaResults)
    
    # Check if the two results are equal. If they are, there is no output
    np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0])
    
    return (F_statistic, pVal) # should be (3.711335988266943, 0.043589334959179327)

Example #39

0

Show file

File: pandas_intro.py Project: b-rodrigues/statsintro

def example_altman():
    '''Example from Altman "Practical statistics for medical research'''
    
    data = getData('altman_94.txt')
    
    lean = pd.Series(data[data[:,1]==1,0])
    obese = pd.Series(data[data[:,1]==0,0])
    
    df = pd.DataFrame({'lean':lean, 'obese':obese})
    
    print(df.mean())
    plt.show()
    
    df.boxplot()
    plt.show()
    
    stats.ttest_ind(lean, obese)

Example #40

0

Show file

File: multivariate.py Project: fluxium/statsintro

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas.
    
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to mean circumferential shortening velocity (%/sec), derived form echocardiography .
    
    '''
    
    # Get the data
    data = getData('altman_11_6.txt', subDir=r'..\Data\data_altman')
    
    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    # --- >>> START stats <<< ---
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print((model.summary))
    # --- >>> STOP stats <<< ---
    
    return model.f_stat['f-stat'] # should be 4.4140184331462571

Example #41

0

Show file

File: pandas_intro.py Project: TankMermaid/statsintro

def example_altman():
    '''Example from Altman "Practical statistics for medical research'''

    data = getData('altman_94.txt')

    lean = pd.Series(data[data[:, 1] == 1, 0])
    obese = pd.Series(data[data[:, 1] == 0, 0])

    df = pd.DataFrame({'lean': lean, 'obese': obese})

    print(df.mean())
    plt.show()

    df.boxplot()
    plt.show()

    stats.ttest_ind(lean, obese)

Example #42

0

Show file

def regression_line():
    '''Fit a line, using the powerful "ordinary least square" method of pandas.
    
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to mean circumferential shortening velocity (%/sec), derived form echocardiography .
    
    '''

    # Get the data
    data = getData('altman_11_6.txt', subDir=r'..\Data\data_altman')

    df = pd.DataFrame(data, columns=['glucose', 'Vcf'])
    # --- >>> START stats <<< ---
    model = pd.ols(y=df['Vcf'], x=df['glucose'])
    print((model.summary))
    # --- >>> STOP stats <<< ---

    return model.f_stat['f-stat']  # should be 4.4140184331462571

Example #43

0

Show file

File: multivariate.py Project: josef-pkt/statsintro

def correlation():
    '''Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to
    mean circumferential shortening velocity (%/sec).'''
    
    # Get the data
    data = getData('altman_11_1.txt')
    
    # Bring them into the dataframe-format
    df = pd.DataFrame(data, columns=['age', 'fat'])
    
    # Calculate correlations
    corr = {}
    corr['pearson'] = df['age'].corr(df['fat'], method = 'pearson')
    corr['spearman'] = df['age'].corr(df['fat'], method = 'spearman')
    corr['kendall'] = df['age'].corr(df['fat'], method = 'kendall')
    
    print(corr)

Example #44

0

Show file

def correlation():
    '''Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to
    mean circumferential shortening velocity (%/sec).'''

    # Get the data
    data = getData('altman_11_1.txt')

    # Bring them into the dataframe-format
    df = pd.DataFrame(data, columns=['age', 'fat'])

    # Calculate correlations
    corr = {}
    corr['pearson'] = df['age'].corr(df['fat'], method='pearson')
    corr['spearman'] = df['age'].corr(df['fat'], method='spearman')
    corr['kendall'] = df['age'].corr(df['fat'], method='kendall')

    print(corr)

Example #45

0

Show file

File: twoSample.py Project: drlinus/PY_1st

def unpaired_data():
    ''' Then some unpaired comparison: 24 hour total energy expenditure (MJ/day),
    in groups of lean and obese women'''

    # Get the data: energy expenditure in mJ and stature (0=obese, 1=lean)
    energ = getData('altman_94.txt', subDir=r'..\Data\data_altman')

    # Group them
    group1 = energ[:, 1] == 0
    group1 = energ[group1][:, 0]
    group2 = energ[:, 1] == 1
    group2 = energ[group2][:, 0]

    np.mean(group1)
    np.mean(group2)

    # --- >>> START stats <<< ---
    # two-sample t-test
    # null hypothesis: the two groups have the same mean
    # this test assumes the two groups have the same variance...
    # (can be checked with tests for equal variance)
    # independent groups: e.g., how boys and girls fare at an exam
    # dependent groups: e.g., how the same class fare at 2 different exams
    t_statistic, p_value = stats.ttest_ind(group1, group2)

    # p_value < 0.05 => alternative hypothesis:
    # they don't have the same mean at the 5% significance level
    print(("two-sample t-test", p_value))

    # For non-normally distributed data, perform the two-sample wilcoxon test
    # a.k.a Mann Whitney U
    u, p_value = stats.mannwhitneyu(group1, group2)
    print(("Mann-Whitney test", p_value))
    # --- >>> STOP stats <<< ---

    # Plot the data
    plt.plot(group1, 'bx', label='obese')
    plt.hold(True)
    plt.plot(group2, 'ro', label='lean')
    plt.legend(loc=0)
    plt.show()

    return p_value  # should be 0.0010608066929400244

Example #46

0

Show file

File: twoSample.py Project: CeasarSS/books

def unpaired_data():
    ''' Then some unpaired comparison: 24 hour total energy expenditure (MJ/day),
    in groups of lean and obese women'''
    
    # Get the data: energy expenditure in mJ and stature (0=obese, 1=lean)
    energ = getData('altman_94.txt', subDir=r'..\Data\data_altman')
    
    # Group them
    group1 = energ[:, 1] == 0
    group1 = energ[group1][:, 0]
    group2 = energ[:, 1] == 1
    group2 = energ[group2][:, 0]
    
    mean(group1)
    mean(group2)
    
    # --- >>> START stats <<< ---
    # two-sample t-test
    # null hypothesis: the two groups have the same mean
    # this test assumes the two groups have the same variance...
    # (can be checked with tests for equal variance)
    # independent groups: e.g., how boys and girls fare at an exam
    # dependent groups: e.g., how the same class fare at 2 different exams
    t_statistic, p_value = stats.ttest_ind(group1, group2)
    
    # p_value < 0.05 => alternative hypothesis:
    # they don't have the same mean at the 5% significance level
    print(("two-sample t-test", p_value))
    
    # For non-normally distributed data, perform the two-sample wilcoxon test
    # a.k.a Mann Whitney U
    u, p_value = stats.mannwhitneyu(group1, group2)
    print(("Mann-Whitney test", p_value))
    # --- >>> STOP stats <<< ---
    
    # Plot the data
    plt.plot(group1, 'bx', label='obese')
    plt.hold(True)
    plt.plot(group2, 'ro', label='lean')
    plt.legend(loc=0)
    plt.show()
    
    return p_value  # should be 0.0010608066929400244

Example #47

0

Show file

File: 2-way.py Project: mlskit/astromlskit

def anova_interaction():
    '''ANOVA with interaction: Measurement of fetal head circumference,
    by four observers in three fetuses, from a study investigating the
    reproducibility of ultrasonic fetal head circumference data.'''
    
    # Get the data
    data = getData('altman_12_6.txt', subDir='..\Data\data_altman')
    
    # Bring them in dataframe-format
    df = pd.DataFrame(data, columns=['hs', 'fetus', 'observer'])
    
    # --- >>> START stats <<< ---
    # Determine the ANOVA with interaction
    formula = 'hs ~ C(fetus) + C(observer) + C(fetus):C(observer)'
    lm = ols(formula, df).fit()
    anovaResults = anova_lm(lm)
    # --- >>> STOP stats <<< ---
    print(anovaResults)

    return  anovaResults['F'][0]

Example #48

0

Show file

File: anovaTwoway.py Project: phaustin/statsintro

def anova_interaction():
    """ANOVA with interaction: Measurement of fetal head circumference,
    by four observers in three fetuses, from a study investigating the
    reproducibility of ultrasonic fetal head circumference data."""

    # Get the data
    data = getData("altman_12_6.txt", subDir="..\Data\data_altman")

    # Bring them in dataframe-format
    df = pd.DataFrame(data, columns=["hs", "fetus", "observer"])

    # --- >>> START stats <<< ---
    # Determine the ANOVA with interaction
    # [xxx]
    formula = "hs ~ C(fetus) + C(observer) + C(fetus):C(observer)"
    lm = ols(formula, df).fit()
    anovaResults = anova_lm(lm)
    # --- >>> STOP stats <<< ---
    print(anovaResults)

    return anovaResults["F"][0]

Example #49

0

Show file

File: analyzer.py Project: piivonen/BajarPulse

def calc_sharpe_ratio_sym(data,symbol,lookbackDays,enddate=dt.today(),index=None,silent=False):
  #data: Dataframe containing at least date for the required time period for the required symbol and index
  #symbol: Symbol for which sharpe ratio is to be calculated
  #lookbackDays: # of days to look back from the enddate to calculate sharpe ratio
  #enddate: Last day for sharpe ratio calculation
  #index: Reference index to be used
  #silent: silence print statements
  if index == None:
    index = getdata.getIndexTicker(getdata.getData([symbol],getdata.getParamDict('stock exchange')))
  #data = getdata.get_history([symbol,index],dt.today()-relativedelta(days=days))
  #data = data.drop(['Open','High','Low','Close','Volume'],axis=1)
  #data = data.unstack(0).swaplevel(0,1,axis=1).sortlevel(0,axis=1)
  data = append_return(data)
  r_sym = data.ix[symbol]['return'].ix[(enddate-relativedelta(days=lookbackDays)):enddate]
  r_index = data.ix[index]['return'].ix[(enddate-relativedelta(days=lookbackDays)):enddate]
  r_sym_mean = r_sym.mean()
  r_index_mean = r_index.mean()
  std_sym_wrt_index = (r_sym-r_index).std()
  answer = (r_sym_mean-r_index_mean)/std_sym_wrt_index
  if not silent:
    print days, "day Sharpe Ratio for",symbol, "=",answer
  return answer

Example #50

0

Show file

File: multivariate.py Project: ing7t/kod

def correlation():
    """Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to
    mean circumferential shortening velocity (%/sec)."""

    # Get the data
    data = getData("altman_11_1.txt", subDir="..\Data\data_altman")
    x = data[:, 0]
    y = data[:, 1]

    # Calculate correlations
    corr = {}
    corr["pearson"], _ = stats.pearsonr(x, y)
    corr["spearman"], _ = stats.spearmanr(x, y)
    corr["kendall"], _ = stats.kendalltau(x, y)

    print(corr)

    # Assert that Spearman's rho is just the correlation of the ranksorted data
    testing.assert_almost_equal(corr["spearman"], stats.pearsonr(stats.rankdata(x), stats.rankdata(y))[0])

    return corr["pearson"]  # should be 0.79208623217849117

Example #51

0

Show file

File: anovaOneway.py Project: ing7t/kod

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal. '''

    # Get the data
    print('One-way ANOVA: -----------------')
    data = getData('altman_910.txt', subDir='..\Data\data_altman')

    # Sort them into groups, according to column 1
    group1 = data[data[:, 1] == 1, 0]
    group2 = data[data[:, 1] == 2, 0]
    group3 = data[data[:, 1] == 3, 0]

    # First, check if the variances are equal, with the "Levene"-test
    (W, p) = stats.levene(group1, group2, group3)
    if p < 0.05:
        print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(
            p))

    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)

    # Print the results
    print('Data form Altman 910:')
    print((F_statistic, pVal))
    if pVal < 0.05:
        print('One of the groups is significantly different.')

    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])
    model = ols('value ~ C(treatment)', df).fit()
    anovaResults = anova_lm(model)
    print(anovaResults)

    # Check if the two results are equal. If they are, there is no output
    np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0])

    return (F_statistic,
            pVal)  # should be (3.711335988266943, 0.043589334959179327)

Example #52

0

Show file

File: pandas_intro.py Project: sampathweb/statsintro

def example_altman():
    '''Example from Altman "Practical statistics for medical research'''
    
    data = getData(r'altman_94.txt', subDir='..\Data\data_altman')
    
    lean = pd.Series(data[data[:,1]==1,0])
    obese = pd.Series(data[data[:,1]==0,0])
    
    df = pd.DataFrame({'lean':lean, 'obese':obese})
    
    print(df.mean())
    plt.show()
    
    df.boxplot()
    plt.show()
    
    (tVal, p) = stats.ttest_ind(lean, obese)
    if p < 0.05:
        print('"lean" significantly different from "obese": p={0}'.format(p))
    else:
        print('No difference between "lean" and "obese"')
    
    return p    # supposed to be 0.00079899821117005397

Example #53

0

Show file

File: multivariate.py Project: fluxium/statsintro

def correlation():
    '''Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    comparing age and %fat (measured by dual-photon absorptiometry) for 18 normal adults.'''
    
    # Get the data
    data = getData('altman_11_1.txt', subDir='..\Data\data_altman')
    x = data[:,0]
    y = data[:,1]
    
    # --- >>> START stats <<< ---
    # Calculate correlations
    corr = {}
    corr['pearson'], _ = stats.pearsonr(x,y)
    corr['spearman'], _ = stats.spearmanr(x,y)
    corr['kendall'], _ = stats.kendalltau(x,y)
    # --- >>> STOP stats <<< ---
    
    print(corr)    
    
    # Assert that Spearman's rho is just the correlation of the ranksorted data
    np.testing.assert_almost_equal(corr['spearman'], stats.pearsonr(stats.rankdata(x), stats.rankdata(y))[0])
    
    return corr['pearson']  # should be 0.79208623217849117

Example #54

0

Show file

File: multivariate.py Project: EJHortala/books-2

def correlation():
    '''Pearson correlation, and two types of rank correlation (Spearman, Kendall)
    Data from 24 type 1 diabetic patients, relating Fasting blood glucose (mmol/l) to
    mean circumferential shortening velocity (%/sec).'''

    # Get the data
    data = getData('altman_11_1.txt', subDir='..\Data\data_altman')
    x = data[:, 0]
    y = data[:, 1]

    # Calculate correlations
    corr = {}
    corr['pearson'], _ = stats.pearsonr(x, y)
    corr['spearman'], _ = stats.spearmanr(x, y)
    corr['kendall'], _ = stats.kendalltau(x, y)

    print(corr)

    # Assert that Spearman's rho is just the correlation of the ranksorted data
    testing.assert_almost_equal(
        corr['spearman'],
        stats.pearsonr(stats.rankdata(x), stats.rankdata(y))[0])

    return corr['pearson']  # should be 0.79208623217849117

Example #55

0

Show file

File: anovaOneway.py Project: EJHortala/books-2

def anova_oneway():
    ''' One-way ANOVA: test if results from 3 groups are equal. '''
    
    # Get the data
    print('One-way ANOVA: -----------------')
    data = getData('altman_910.txt', subDir='..\Data\data_altman')
    
    # Sort them into groups, according to column 1
    group1 = data[data[:,1]==1,0]
    group2 = data[data[:,1]==2,0]
    group3 = data[data[:,1]==3,0]
    
    # First, check if the variances are equal, with the "Levene"-test
    (W,p) = stats.levene(group1, group2, group3)
    if p<0.05:
        print('Warning: the p-value of the Levene test is <0.05: p={0}'.format(p))
    
    # Do the one-way ANOVA
    F_statistic, pVal = stats.f_oneway(group1, group2, group3)
    
    # Print the results
    print('Data form Altman 910:')
    print((F_statistic, pVal))
    if pVal < 0.05:
        print('One of the groups is significantly different.')
        
    # Elegant alternative implementation, with pandas & statsmodels
    df = pd.DataFrame(data, columns=['value', 'treatment'])    
    model = ols('value ~ C(treatment)', df).fit()
    anovaResults = anova_lm(model)
    print(anovaResults)
    
    # Check if the two results are equal. If they are, there is no output
    np.testing.assert_almost_equal(F_statistic, anovaResults['F'][0])
    
    return (F_statistic, pVal) # should be (3.711335988266943, 0.043589334959179327)

Example #56

0

Show file

from tradestats import tradestats
from plot import plot_net_value
from configue import M, T
import pandas as pd

# set the display parameters for pandas DataFrame
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

# set some basic parameters
init_capital = 3000
quantity = 1
fee_rate = 0.0001

# do the backtest
data_1m = getData()
signaltrade_result = signaltrade(data_1m, 0.003, M, T, quantity, fee_rate)
orderbook = signaltrade_result[1]
tradedate = signaltrade_result[2]
# get detailed backtest performance
stats = tradestats(orderbook, init_capital, tradedate)

# save backtest performance into excel file
writer = pd.ExcelWriter('backtest_result.xlsx')
stats.to_excel(writer, 'stats', index=False)
orderbook.to_excel(writer, 'orderbook', index=False)

writer.save()

# plot the net value figure
start_time = signaltrade_result[0][tradedate[0]].loc[0, 'time']

Example #57

0

Show file

File: survival.py Project: b-rodrigues/statsintro

    chi2 = (O1 - E1) ** 2 / V
    p = stats.chi2.sf(chi2, 1)

    print("X^2 = {0}".format(chi2))
    if p < 0.05:
        print("p={0}, the two survival curves are signifcantly different.".format(p))
    else:
        print("p={0}, the two survival curves are not signifcantly different.".format(p))

    return (p, chi2)


if __name__ == "__main__":
    # get the data
    data1 = getData("altman_13_2.txt")
    data2 = getData("altman_13_3.txt")

    # Determine the Kaplan-Meier curves
    (p1, r1, t1, sp1, se1) = kaplanmeier(data1)
    (p2, r2, t2, sp2, se2) = kaplanmeier(data2)

    # Make a combined plot for both datasets
    plt.step(t1, sp1, where="post")
    plt.hold(True)
    plt.step(t2, sp2, "r", where="post")

    plt.legend(["Data1", "Data2"])
    plt.ylim(0, 1)
    plt.xlabel("Time")
    plt.ylabel("Survival Probability")

Example #58

0

Show file

File: nnMain.py Project: XkhldY/kaggle_sfcrime

def main():
    """Main function for sf-crime machine learning
    From training data try to predict the category of crime
    given the date and location.
    """

    again = True

    while again:
        p = float(raw_input('Percent of data to train on: '))
        ran = raw_input('Shuffle data?(y/n) ')
        if ran == 'y' or ran == 'Y':
            ran = True
        else:
            ran = False

        # setup matrices from train.csv file
        out = getData('train.csv', perc=p, rand=ran)
        X = np.array(out['X'])
        Y = out['Y']
        X_test = np.array(out['X_test'])
        Y_test = out['Y_test']
        crimes = out['crimes']

        # calculate mean and standard deviation
        mu = np.mean(X)
        sigma = np.std(X)
        X = normalize(X, mu, sigma)
        X_test = normalize(X_test, mu, sigma)
        
        # get dimensions of matrices 
        m = len(X)
        n = len(X[0])
        k = len(Y[0])
        k_h = (n + k) // 2
        print 'Dimensions: m =', m, 'n =', n, 'k =', k, 'k_h =', k_h

        # randomly initialize Theta
        epsilon = 0.15
        Theta1 = np.random.rand(n, k_h)
        Theta1 = Theta1 * 2 * epsilon - epsilon
        Theta2 = np.random.rand(k_h, k)
        Theta2 = Theta2 * 2 * epsilon - epsilon
        one = np.ones(k_h)
        one = np.reshape(one, (1, k_h))
        Theta1 = np.concatenate((one, Theta1), axis=0)
        one = np.ones(k)
        one = np.reshape(one, (1, k))
        Theta2 = np.concatenate((one, Theta2), axis=0)
        Theta1 = np.ndarray.flatten(Theta1)
        Theta2 = np.ndarray.flatten(Theta2)
        Theta = np.append(Theta1, Theta2)

        # minimize costFunction of Theta
        new_lam = True
        while new_lam:
            lam = float(raw_input('Enter lambda: '))
            xopt = fmin_bfgs(costFunction, Theta, 
                             fprime=gradient, args=(X,Y,lam)
                             )
            Theta1 = np.reshape(xopt[0:(n+1)*k_h], (n + 1, k_h))
            Theta2 = np.reshape(xopt[(n+1)*k_h:], (k_h + 1, k))

            # accuracy against training set
            m = len(X)
            one = np.ones(m)
            one = np.reshape(one, (m, 1))
            a1 = np.concatenate((one, X), axis=1)
            a2 = sigmoid(np.dot(a1, Theta1))
            a2 = np.concatenate((one, a2), axis=1)
            test = sigmoid(np.dot(a2, Theta2))
            correct = 0
            for i in range(len(test)):
                j = np.argmax(test[i])
                if j == np.argmax(Y[i]):
                    correct += 1
            print 'Training set accuracy =', 100.0 * correct / len(test)

            # if there is a test matrix test accuracy of Theta
            if len(X_test) > 0:
                m = len(X_test)
                one = np.ones(m)
                one = np.reshape(one, (m, 1))
                a1 = np.concatenate((one, X_test), axis=1)
                a2 = sigmoid(np.dot(a1, Theta1))
                a2 = np.concatenate((one, a2), axis=1)
                test = sigmoid(np.dot(a2, Theta2))
                correct = 0
                for i in range(len(test)):
                    j = np.argmax(test[i])
                    if j == np.argmax(Y_test[i]):
                        correct += 1
                print 'Test set accuracy =', 100.0 * correct / len(test)
            new_lam = raw_input('Different lambda?(y/n) ')
            if new_lam == 'y' or new_lam == 'Y':
                new_lam = True
            else:
                new_lam = False

        sub = raw_input('Create submission file?(y/n) ')
        if sub == 'y' or sub == 'Y':
            # create predictions for kaggle test data set
            out = getData('test.csv', perc=1.0, test=True)
            X_test = out['X']
            X_test = normalize(X_test, mu, sigma)
            m = len(X_test)
            one = np.ones(m)
            one = np.reshape(one, (m, 1))
            a1 = np.concatenate((one, X_test), axis=1)
            a2 = sigmoid(np.dot(a1, Theta1))
            a2 = np.concatenate((one, a2), axis=1)
            ans = sigmoid(np.dot(a2, Theta2))

            # write to submission csv file
            sub_file = raw_input('Enter submission file name: ')
            f = open(sub_file, 'w')
            header ='Id'
            for c in crimes:
                header += ',' + c
            f.write(header + '\n')
            for i in range(len(ans)):
                f.write(str(i) + ',' + ','.join(map(str, ans[i])) + '\n')
            f.close()

        again = raw_input('Run again? (y/n) ')
        if again == 'y' or again == 'Y':
            again = True
        else:
            again = False

Example #59

0

Show file

File: test_stats.py Project: b-rodrigues/statsintro

 def test_getdata(self):
     data = getData('altman_93.txt')
     self.assertEqual(data[0][0], 5260)