Python stats Exemples, scipy.stats Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : model.py Projet : ronigold/Jupytext_test_2

def prep_data_tests(data_tests,
                    values,
                    index,
                    columns,
                    reset_index=None,
                    prefix='Unknown_',
                    impute_grouping=['Wafer_ID'],
                    cont_func=['median'],
                    missing=True,
                    Threshold=20):
    data_pivot_tests = pivot_data(data_tests, values, index, columns,
                                  reset_index, prefix)
    data_tests_pivot_imputed, columns_drop, wafers_drop = drop_missing_values(
        data_pivot_tests, Threshold)
    data_tests_pivot_imputed = replace_missing_values(
        data_tests_pivot_imputed, impute_grouping,
        data_tests_pivot_imputed.columns, missing)
    data_tests_wafer_medians = stats(data_tests_pivot_imputed, ['Wafer_ID'],
                                     cont_func, '_wafer_')
    data_tests_fablot_medians = stats(data_tests_pivot_imputed,
                                      ['FabLot', 'Wafer_ID'], cont_func,
                                      '_fablot_')
    data_tests_stats = pd.merge(data_tests_wafer_medians.reset_index(),
                                data_tests_fablot_medians.reset_index(),
                                on=['Wafer_ID'],
                                how='inner').set_index(['FabLot', 'Wafer_ID'])
    data_tests.to_csv('data_model_tests.csv', index=True)
    data_tests_stats.to_csv('data_model_tests_stats.csv', index=True)
    return data_tests_pivot_imputed, columns_drop, wafers_drop, data_tests_stats

Exemple #2

0

Afficher le fichier

Fichier : Heikin-Ashi backtest.py Projet : yogiyadav08/quant-trading

def main():

    #initializing

    #stop loss positions, the maximum long positions we can get
    #without certain constraints, you will long indefinites times
    #as long as the market condition triggers the signal
    #in a whipsaw condition, it is suicidal
    stls = 3
    ticker = 'NVDA'
    stdate = '2015-04-01'
    eddate = '2018-02-15'

    #slicer is used for plotting
    #a three year dataset with 750 data points would be too much
    slicer = 700

    #downloading data
    df = yf.download(ticker, start=stdate, end=eddate)

    trading_signals = signal_generation(df, heikin_ashi, stls)

    viz = trading_signals[slicer:]
    plot(viz, ticker)

    portfolio_details = portfolio(viz)
    profit(portfolio_details)

    stats(portfolio_details, trading_signals, stdate, eddate)

Exemple #3

0

Afficher le fichier

def analyzeTweets(keyword, option):
    # once in analyze mode, determine which analysis / visualization to do
    df = readSQL(keyword)
    df['datetime'] = pd.to_datetime(df['datetime'])
    n = len(df)

    if option == 'stats':
        stats(df, stripHash(keyword))
        return
    elif option == 'interval':
        interval(df, keyword, n)
        return
    elif option == 'line':
        line(df, keyword, n)
        return
    elif option == 'dist':
        dist(df, keyword, n)
        return
    elif option == 'scatter':
        scatter(df, keyword, n)
        return
    elif option == 'pie':
        pie(keyword)
        return
    elif option == 'map':
        sentMap(df, keyword)
        return

Exemple #4

0

Afficher le fichier

def eval_bias_variance_learner_cont(env, agent, sup, T, num_samples=1):
    s = env.reset()
    biases = []
    variances = []

    for i in range(num_samples):
        bias, variance, t = 0, 0, 0
        while t < T:
            a = agent.sample_action(s)  # \E_D(\pi^D_\theta(s))
            a_sup = sup.intended_action(s)  # \pi^*(s)
            # For variance, at each state sample actions from a random model and compare
            # to that from expected model
            a_ensemble_list = agent.intended_actions(s)
            ensemble_idx = np.random.randint(len(a_ensemble_list))
            a_ensemble = a_ensemble_list[ensemble_idx]  # \pi^D_\theta(s)

            # Need to evaluate bias/variance on learner's dist
            next_s, r, done, _ = env.step(a)
            s = next_s
            bias += np.sum((a - a_sup)**2)
            variance += np.sum((a - a_ensemble)**2)
            t += 1

            if done == True:
                break

        bias /= float(t)
        variance /= float(t)
        biases.append(bias)
        variances.append(variance)

    return stats(biases), stats(variances)

Exemple #5

0

Afficher le fichier

Fichier : timing.py Projet : davmre/treegp

def strstats(v):
    if v.dtype == float:
        return "mean %f std %f min %f 10th %f 50th %f 90th %f max %f" % stats(v)
    elif v.dtype == int:
        return "mean %d std %d min %d 10th %d 50th %d 90th %d max %d" % stats(v)
    else:
        raise Exception("weird dtype %s"% v.dtype)

Exemple #6

0

Afficher le fichier

def main():

    #    parse_ftp()

    pop = 'uganda'
    coverage = '4x'
    dn = 'DNAse-seq'
    dn = sys.argv[-1]
    print(
        pop,
        coverage,
        dn,
    )

    l_vcfs = glob.glob('../pipeline/%s%s/out_UnifiedGenotyper/*.vcf' % (
        pop,
        coverage,
    ))
    l_vcfs_sorted = sort_nicely(l_vcfs)
    l_vcfs_sorted = [
        '../pipeline/uganda4x/out_UnifiedGenotyper/UnifiedGenotyper.2.2.vcf'
    ]
    ####    l_vcfs_sorted = l_vcfs_sorted[:10]
    sep = '/'
    #    l_vcfs_exome = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.exome.1000GApr12.vcf')
    #    l_vcfs_exome_sorted = sort_nicely(l_vcfs_exome)
    #    l_vcfs = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz')
    #    l_vcfs_sorted = sort_nicely(l_vcfs)
    #    sep = '|'

    ##    gnuplot.contour_plot(
    ##        path_dat='INDEL_%s.dat' %(dn),
    ##        bool_remove=False,
    ##        xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length',
    ##        bool_log=False,
    ##        x1=0,x2=50,y1=1,y2=20,
    ##        )
    ##    stop

    d_lengths = loop_UG_out(l_vcfs_sorted, dn, sep)

    plot_contour(dn, d_lengths)

    s = ''
    for k1 in d_lengths.keys():
        for k2, v in d_lengths.items():
            s += '%s %s %s\n' % (k1, k2, v)
    fd = open('%s.dict', 'w')
    fd.write(s)
    fd.close()

    stats(
        d_lengths,
        dn,
    )

    ########    plot_length_distribution(pop,coverage,d_lengths,)

    return

Exemple #7

0

Afficher le fichier

Fichier : callstats.py Projet : AndDiSa/external_v8

    def print_entry(key, value):
        def stats(s, units=""):
            conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
            return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)

        print "{:>50s}  {}  {}".format(key,
                                       stats(value['time_stat'], units="ms"),
                                       stats(value['count_stat']))

Exemple #8

0

Afficher le fichier

Fichier : callstats.py Projet : darahayes/node

 def print_entry(key, value):
   def stats(s, units=""):
     conf = "{:0.1f}({:0.2f}%)".format(s['ci']['abs'], s['ci']['perc'])
     return "{:8.1f}{} +/- {:15s}".format(s['average'], units, conf)
   print "{:>50s}  {}  {}".format(
     key,
     stats(value['time_stat'], units="ms"),
     stats(value['count_stat'])
   )

Exemple #9

0

Afficher le fichier

Fichier : mcmc_broad.py Projet : psharda/gidget

def printrestart(restart):
    ''' print quick summary info about the current state of the sampler. '''
    print "restart info: "
    print " current shape of chain: (nwalkers x niterations x ndim) ", np.shape(
        restart['chain'])
    print " autocorrelation lengths for each parameter: ", restart['acor']
    stats(restart['acor'])
    print " acceptance rate for each walker: ", restart['accept']
    stats(restart['accept'])

Exemple #10

0

Afficher le fichier

Fichier : mcmc_broad.py Projet : psharda/gidget

def probsplots(allprobs, fn, chain, burnin=0):
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    nwalker = np.shape(allprobs)[0]
    iters = np.shape(allprobs)[1]
    ndim = np.shape(chain)[2]

    # plot the trace of the probabilities for every walker.
    fig, ax = plt.subplots()
    for walker in range(nwalker):
        ax.plot(allprobs[walker, burnin:], alpha=.3, ls='--')
    plt.savefig(fn + '_probs.png')
    plt.close(fig)
    print "saved " + fn + '_probs.png'

    fig, ax = plt.subplots()
    kss = []
    kssthetas = np.zeros((ndim, iters - 1))
    # chain ~ walker x iter x dim
    for iter in range(iters - 1):
        kss.append(ksprob(allprobs[:, iter], allprobs[:, iters - 1]))
        for k in range(ndim):
            kssthetas[k, iter] = ksprob(chain[:, iter, k], chain[:, iters - 1,
                                                                 k])
    ax.scatter(range(iters - 1)[-25:], kss[-25:], s=30, marker='+')
    colors = [
        'k', 'r', 'green', 'orange', 'lightblue', 'grey', 'purple', 'pink',
        'yellow', 'blue', 'lightgreen', 'darkgreen'
    ] * 5
    for k in range(ndim):
        ax.plot(range(iters - 1)[-25:], kssthetas[k, -25:], color=colors[k])
    ax.set_xlabel('iter')
    ax.set_ylabel('ks prob vs last iteration')
    ax.set_ylim(-0.01, 1.02)
    plt.savefig(fn + '_ks.png')
    plt.close(fig)

    changes = np.zeros(nwalker)
    for walker in range(nwalker):
        for iter in range(iters - 1):
            if allprobs[walker, iter] != allprobs[walker, iter + 1]:
                changes[walker] += 1.0
    changes = changes / float(iters - 1.0)
    print "long-term acceptance fraction stats: "
    stats(changes)

    acor = np.zeros(nwalker)
    for walker in range(nwalker):
        acor[walker] = emcee.autocorr.integrated_time(
            allprobs[walker, burnin:], window=min([50, iters / 2]))
    print "acor stats: "
    stats(acor)

Exemple #11

0

Afficher le fichier

Fichier : stats.py Projet : arvindkamal/pyrsss

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = ArgumentParser('Report mean and standard deviation for the stream of numbers read from stdin',
                            formatter_class=ArgumentDefaultsHelpFormatter)
    args = parser.parse_args(argv[1:])

    stats = Stats()
    for line in sys.stdin:
        stats(float(line))
    print(stats)

Exemple #12

0

Afficher le fichier

Fichier : stats.py Projet : mfkiwl/pyrsss

def main(argv=None):
    if argv is None:
        argv = sys.argv

    parser = ArgumentParser(
        'Report mean and standard deviation for the stream of numbers read from stdin',
        formatter_class=ArgumentDefaultsHelpFormatter)
    args = parser.parse_args(argv[1:])

    stats = Stats()
    for line in sys.stdin:
        stats(float(line))
    print(stats)

Exemple #13

0

Afficher le fichier

Fichier : result_parser.py Projet : moritz-raabe/P.808

def analyze_results(config, test_method, answer_path, list_of_req,
                    quality_bonus):

    full_data, accepted_sessions = data_cleaning(answer_path, test_method)
    n_workers = number_of_uniqe_workers(full_data)
    print(f"{n_workers} workers participated in this batch.")
    stats(answer_path)
    # votes_per_file, votes_per_condition = transform(accepted_sessions)
    if len(accepted_sessions) > 1:
        print(
            "Transforming data (the ones with 'accepted_and_use' ==1 --> group per clip"
        )
        use_condition_level = config.has_option('general', 'condition_pattern')
        votes_per_file, vote_per_condition = transform(
            test_method, accepted_sessions,
            config.has_option('general', 'condition_pattern'))
        votes_per_file_path = os.path.splitext(
            answer_path)[0] + '_votes_per_clip.csv'
        votes_per_cond_path = os.path.splitext(
            answer_path)[0] + '_votes_per_cond.csv'

        condition_keys = []
        if config.has_option('general', 'condition_pattern'):
            condition_keys = config['general']['condition_keys'].split(',')
            condition_keys.append('Unknown')
        headers = create_headers_for_per_file_report(test_method,
                                                     condition_keys)
        write_dict_as_csv(votes_per_file, votes_per_file_path, headers=headers)
        print(f'   Votes per files are saved in: {votes_per_file_path}')
        if use_condition_level:
            write_dict_as_csv(vote_per_condition, votes_per_cond_path)
            print(f'   Votes per files are saved in: {votes_per_cond_path}')

        bonus_file = os.path.splitext(
            answer_path)[0] + '_quantity_bonus_report.csv'
        quantity_bonus_df = calc_quantity_bonuses(full_data, list_of_req,
                                                  bonus_file)

        if quality_bonus:
            quality_bonus_path = os.path.splitext(
                answer_path)[0] + '_quality_bonus_report.csv'
            if 'all' not in list_of_req:
                quantity_bonus_df = calc_quantity_bonuses(
                    full_data, ['all'], None)
            if use_condition_level:
                votes_to_use = vote_per_condition
            else:
                votes_to_use = votes_per_file
            calc_quality_bonuses(quantity_bonus_df, accepted_sessions,
                                 votes_to_use, config, quality_bonus_path,
                                 n_workers, test_method, use_condition_level)

Exemple #14

0

Afficher le fichier

def writer(information, outfile, features, end_structure):

    dist = stats([int(item[1]) for item in information])
    length_stem = stats([item[2] for item in information])
    terminal_structure = stats([item[3] for item in information])

    ordered_ends = sorted(end_structure.items(),
                          key=operator.itemgetter(1),
                          reverse=True)

    with open(outfile, 'w') as outp:
        outp.write(
            "distance of nearest Structure (discounting base pairing in 5' terminal 50 nt, due to dubious nature of this interaction)\n"
        )
        outp.write("mean,min,max,median,std_dev\n")
        for item in dist[0:-2]:
            outp.write(str(item) + ",")
        outp.write(str(dist[-1]))
        outp.write("\n")
        outp.write("\n")

        outp.write(
            "length of base-pairing for nearest structure (ignores one nucleotide mismatch)\n"
        )
        outp.write("mean,min,max,median,std_dev\n")
        for item in length_stem[0:-2]:
            outp.write(str(item) + ",")
        outp.write(str(length_stem[-1]))
        outp.write("\n")
        outp.write("\n")

        outp.write(
            "how structured is the 3' terminal 50 nt (percent base paired,will discount base pairing in 5' terminal 50 nt, due to dubious nature of this interaction)\n"
        )
        outp.write("mean,min,max,median,std_dev\n")
        for item in terminal_structure[0:-2]:
            outp.write(str(item) + ",")
        outp.write(str(terminal_structure[-1]))
        outp.write("\n")
        outp.write("\n")

        q = 1
        outp.write(
            "3' terminal 50 nt ordered as a function of structure (top = most structured, bottom = least structured )\n"
        )
        outp.write("rank,percent_stranded,coordinate,gene_feature\n")
        for item in ordered_ends:
            outp.write(
                str(q) + ',' + str(item[1]) + ',' + str(item[0]) + ',' +
                str(features[item[0]]) + "\n")
            q += 1

Exemple #15

0

Afficher le fichier

Fichier : QuoteNDepStats.py Projet : luciasalar/Lyrics-and-Depression

    def get_lda_individual(self, topic_num, alpha, eta, text):
        """get individual lda model"""

        s = stats(360)
        post = s.get_data_by_day()

        nonOrg = post.loc[post['label'] == 1]
        lyrics = post.loc[post['label'] == 2]

        topic = LDATopicModel()
        c = Count_Vect()
        
        # change here to define lyrics or quote
        if text in 'lyrics':
            text = lyrics[['text', 'userid']]
            text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x)))
            text = c.get_precocessed_text(text)
            text['text'] = text['text'].apply(lambda x: x.split())

            dictionary = gensim.corpora.Dictionary(text['text'])# generate dictionary
            bow_corpus = [dictionary.doc2bow(doc) for doc in text['text']]
            model, coherence = topic.get_lda_score_eval(dictionary, bow_corpus, topic_num, alpha, eta)

        elif text in 'quotes':
            text = nonOrg[['text', 'userid']]
            text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x)))
            text = c.get_precocessed_text(text)
            text['text'] = text['text'].apply(lambda x: x.split())

            dictionary = gensim.corpora.Dictionary(text['text'])# generate dictionary
            bow_corpus = [dictionary.doc2bow(doc) for doc in text['text']]
            model, coherence = topic.get_lda_score_eval(dictionary, bow_corpus, topic_num, alpha, eta)
        
        # model evaluation 
        return model, coherence

Exemple #16

0

Afficher le fichier

Fichier : ENCODE.py Projet : grbot/tc9

def main():

#    parse_ftp()

    pop = 'uganda'
    coverage = '4x'
    dn = 'DNAse-seq'
    dn = sys.argv[-1]
    print(pop,coverage,dn,)

    l_vcfs = glob.glob('../pipeline/%s%s/out_UnifiedGenotyper/*.vcf' %(pop,coverage,))
    l_vcfs_sorted = sort_nicely(l_vcfs)
    l_vcfs_sorted = ['../pipeline/uganda4x/out_UnifiedGenotyper/UnifiedGenotyper.2.2.vcf']
####    l_vcfs_sorted = l_vcfs_sorted[:10]
    sep = '/'
#    l_vcfs_exome = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.exome.1000GApr12.vcf')
#    l_vcfs_exome_sorted = sort_nicely(l_vcfs_exome)
#    l_vcfs = glob.glob('/nfs/t149_1kg/phase1_v3/ALL.chr*.phase1_release_v3.20101123.snps_indels_svs.genotypes.vcf.gz')
#    l_vcfs_sorted = sort_nicely(l_vcfs)
#    sep = '|'

##    gnuplot.contour_plot(
##        path_dat='INDEL_%s.dat' %(dn),
##        bool_remove=False,
##        xlabel='dist_m_i_n (1000bp)',ylabel='INDEL length',
##        bool_log=False,
##        x1=0,x2=50,y1=1,y2=20,
##        )
##    stop

    d_lengths = loop_UG_out(l_vcfs_sorted,dn,sep)

    plot_contour(dn,d_lengths)

    s = ''
    for k1 in d_lengths.keys():
        for k2,v in d_lengths.items():
            s += '%s %s %s\n' %(k1,k2,v)
    fd = open('%s.dict','w')
    fd.write(s)
    fd.close()

    stats(d_lengths,dn,)

########    plot_length_distribution(pop,coverage,d_lengths,)

    return

Exemple #17

0

Afficher le fichier

Fichier : QuoteNDepStats.py Projet : luciasalar/Lyrics-and-Depression

  def regression(self, pre_var, data):
      s = stats(self.days) 
      #data = s.get_count_quote()
 
      X = data[pre_var]
      y = data["cesd_sum"]
      model = sm.OLS(y, X).fit()
      print(model.summary())

Exemple #18

0

Afficher le fichier

Fichier : arma.py Projet : simionAndrei/CyberAnomDetect2

def answer(series, field, p, q, r=0, file="test.png"):
    df1 = series[field]
    plot_autocorrelations(df1)

    # %%
    df1_model = create_model(df1, p, q, r)
    stats(df1_model)
    # %%
    analise_model(df1_model)
    # %%

    plot_autocorrelations(df1_model.resid, "Residuals")
    # %%
    extract_resid_stats(df1_model)
    # %%
    plot_predictions(df1, df1_model, file)

    return df1_model

Exemple #19

0

Afficher le fichier

def eval_sim_err_statistics_cont(env, sup, T, num_samples=1):
    losses = []
    for i in range(num_samples):
        tmp_states, int_actions, taken_actions, _ = collect_traj(env, sup, T)
        int_actions = np.array(int_actions)
        taken_actions = np.array(taken_actions)
        errors = (int_actions - taken_actions)**2.0
        errors = np.sum(errors, axis=1)
        losses.append(np.mean(errors))
    return stats(losses)

Exemple #20

0

Afficher le fichier

def mmm():

    mmmArray = (re.split(',', modmedian.get()))
    mmmdesired_array = [int(numeric_string) for numeric_string in mmmArray]

    df = pd.DataFrame({
        " rating1": mmmdesired_array,
        "dummy": range(len(mmmdesired_array))
    })

    f, (ax_box,
        ax_hist) = plt.subplots(2,
                                sharex=True,
                                gridspec_kw={"height_ratios": (0.2, 1)})
    mean = df[' rating1'].mean()
    median = df[' rating1'].median()
    mode = df[' rating1'].mode().get_values()[0]

    sns.boxplot(df[" rating1"], ax=ax_box)
    ax_box.axvline(mean, color='r', linestyle='--')
    ax_box.axvline(median, color='g', linestyle='-')
    ax_box.axvline(mode, color='b', linestyle='-')

    sns.distplot(df[" rating1"], ax=ax_hist)
    ax_hist.axvline(mean, color='r', linestyle='--')
    ax_hist.axvline(median, color='g', linestyle='-')
    ax_hist.axvline(mode, color='b', linestyle='-')

    plt.legend({'Mean': mean, 'Median': median, 'Mode': mode})
    label = 'Mode=' + str(mode) + ', Median=' + str(median) + ', Mean=' + str(
        mean)

    ax_box.set(xlabel=label)
    if (stats(mmmdesired_array) == 0.0):
        ax_hist.set(xlabel="Symmetric (Zero Skewness)")
    elif (stats(mmmdesired_array) > 1):
        ax_hist.set(xlabel="Skewed to the Right (Positive Skewness)")

    else:
        ax_hist.set(xlabel="Skewed to the left (Negative Skewness)")

    plt.show()

Exemple #21

0

Afficher le fichier

def F(env, pi1, pi2, sup, T, num_samples=1):
    losses = []
    for i in range(num_samples):
        # collect trajectory with states visited and actions taken by agent
        tmp_states, _, _, _ = collect_traj(env, pi1, T)
        tmp_actions = np.array([pi2.intended_action(s) for s in tmp_states])
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        errors = 1.0 - np.mean(sup_actions == tmp_actions)
        # compute the mean error on that trajectory (may not be T samples since game ends early on failures)
        losses.append(np.mean(errors))

    # compute the mean and sem on averaged losses.
    return stats(losses)

Exemple #22

0

Afficher le fichier

def main():
    file = sys.argv[1]
    id = sys.argv[2]

    df = pd.read_csv(file)

    if id == 'average':
        df = df.drop(['student'], 1)
        df = df.reindex(sorted(df.columns), axis=1)
        dates = list(df)
        means = df[dates].mean()
        df = pd.DataFrame(means).transpose()
        stats(df)

    else:
        id = int(id)
        frame = df.loc[df['student'] == id]
        if frame.empty:
            print("Nezname student ID")
            return
        frame = frame.drop(['student'], 1)
        stats(frame)

Exemple #23

0

Afficher le fichier

Fichier : pos_statistics.py Projet : jon--lee/icra-tools

def eval_agent_statistics_disc(env, agent, sup, T, num_samples=1):
    """
        evaluate in the given environment along the agent's distribution
        for T timesteps on num_samples
    """
    losses = []
    for i in range(num_samples):
        tmp_states, _, tmp_actions, _ = collect_traj(env, agent, T)
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        errors = (-(sup_actions == tmp_actions)).astype(int)
        losses.append(np.mean(errors))

    return stats(losses)

Exemple #24

0

Afficher le fichier

Fichier : pos_statistics.py Projet : jon--lee/icra-tools

def eval_sup_statistics_disc(env, agent, sup, T, num_samples=1):
    """
        Evaluate on the supervisor's trajectory in the given env
        for T timesteps
    """
    losses = []
    for i in range(num_samples):
        tmp_states, _, _, _ = collect_traj(env, sup, T)
        tmp_actions = np.array([agent.intended_action(s) for s in tmp_states])
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        errors = (-(sup_actions == tmp_actions)).astype(int)
        losses.append(np.mean(errors))

    return stats(losses)

Exemple #25

0

Afficher le fichier

    def get_lda(self):
        """get lda topics"""

        s = stats(180)
        post = s.get_data_by_day()
        nonOrg = post.loc[post['label'] == 1]
        lyrics = nonOrg.loc[nonOrg['tag'] == 2]

        topic = LDATopicModel()
        c = Count_Vect()

        text = lyrics[['text', 'userid']]
        text['text'] = text['text'].apply(lambda x: c.remove_noise(str(x)))
        text = c.get_precocessed_text(text)
        topics, model = topic.get_lda_score(text, 10)
        return topics, model

Exemple #26

0

Afficher le fichier

def eval_agent_statistics_discrete(env, lnr, sup, T, num_samples=1):
    """
        evaluate loss in the given environment along the agent's distribution
        for T timesteps on num_samples
    """
    losses = []
    for i in range(num_samples):
        # collect trajectory with states visited and actions taken by agent
        tmp_states, _, tmp_actions, _ = collect_traj(env, lnr, T)
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        errors = 1.0 - np.mean(sup_actions == tmp_actions)
        # compute the mean error on that trajectory (may not be T samples since game ends early on failures)
        losses.append(np.mean(errors))

    # compute the mean and sem on averaged losses.
    return stats(losses)

Exemple #27

0

Afficher le fichier

Fichier : lfr.py Projet : zjminglove/pcd

def hierarchical(pause=False, **kwargs):
    """Binary networks with overlapping nodes and hierarchies

    This program is an implementation of the algorithm described in
    the paper'Direc ted, weighted and overlapping benchmark graphs for
    community detection algorithm s', written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    binary networks with overlapping nodes and hierarchies.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the micro community sizes]
    -maxc           [maximum for the micro community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -minC           [minimum for the macro community size]
    -maxC           [maximum for the macro community size]
    -mu1            [mixing parameter for the macro communities (see Readme file)]
    -mu2            [mixing parameter for the micro communities (see Readme file)]

    Example2:
    ./hbenchmark -f flags.dat
    ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1
    """
    prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark')
    args = [prog] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community_first_level.dat'):
            g.add_node(n - 1, microC=c - 1)
        for n, c in read_file('community_second_level.dat'):
            g.add_node(n - 1, macroC=c - 1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1 - 1, n2 - 1)
        g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact
            fitz.interact.interact()
    return g

Exemple #28

0

Afficher le fichier

Fichier : lfr.py Projet : rkdarst/pcd

def hierarchical(pause=False, **kwargs):
    """Binary networks with overlapping nodes and hierarchies

    This program is an implementation of the algorithm described in
    the paper'Direc ted, weighted and overlapping benchmark graphs for
    community detection algorithm s', written by Andrea Lancichinetti
    and Santo Fortunato. In particular, this program is to produce
    binary networks with overlapping nodes and hierarchies.

    -N              [number of nodes]
    -k              [average degree]
    -maxk           [maximum degree]
    -t1             [minus exponent for the degree sequence]
    -t2             [minus exponent for the community size distribution]
    -minc           [minimum for the micro community sizes]
    -maxc           [maximum for the micro community sizes]
    -on             [number of overlapping nodes]
    -om             [number of memberships of the overlapping nodes]
    -minC           [minimum for the macro community size]
    -maxC           [maximum for the macro community size]
    -mu1            [mixing parameter for the macro communities (see Readme file)]
    -mu2            [mixing parameter for the micro communities (see Readme file)]

    Example2:
    ./hbenchmark -f flags.dat
    ./hbenchmark -N 10000 -k 20 -maxk 50 -mu2 0.3 -minc 20 -maxc 50 -minC 100 -maxC 1000 -mu1 0.1
    """
    prog = _get_file('lfr_benchmarks/new/hierarchical_bench2_2/hbenchmark')
    args = [ prog ] + makeargs(kwargs)
    print "Arguments are: ", " ".join(args)

    with tmpdir_context(chdir=True, prefix="tmp-lfrbenchmark", dir=tmpbase):
        retcode = subprocess.call(args)
        assert retcode == 0

        g = networkx.Graph()
        for n, c in read_file('community_first_level.dat'):
            g.add_node(n-1, microC=c-1)
        for n, c in read_file('community_second_level.dat'):
            g.add_node(n-1, macroC=c-1)
        for n1, n2 in read_file('network.dat'):
            g.add_edge(n1-1, n2-1)
        g.graph['stats'] = stats(g)
        if pause:
            import fitz.interact ; fitz.interact.interact()
    return g

Exemple #29

0

Afficher le fichier

def toHTML(logDirName):
    logDir = os.getcwd() + '/' + 'table' + '-' + logDirName
    out = open(logDirName + '.html', 'w')
    out.write('<html><body>')

    prefs = []

    for root, dirs, files in os.walk(logDir):
        for name in files:
            if re.compile(".*-.*.out$").match(name):
                prefs.append(name.split('-')[0])
    prefs = set(prefs)

    out.write('<table border=1>')
    out.write(
        "<tr><td>Experiment</td><td>xml</td><td>log</td><td>status</td><td>errors</td><td>warnings</td><td>yikes</td><td>previous</td></tr>\n"
    )
    for p in prefs:

        for root, dirs, files in os.walk(logDir):
            for name in files:
                if re.compile(p + "-.*.out$").match(name):
                    print name
                    out.write('<tr>')
                    out.write('<td><b>%s - %s</b></td>' %
                              (p, name.split('-')[1]))
                    outFile = ("%s/%s" % (root, name))
                    out.write('<td><a href=\"%s\">%s</a></td>' %
                              (outFile, outFile.split('/')[-1]))
                    out.write('<td><a href=\"%s/%s\">log</a></td>' %
                              (root, name.replace('.out', '.log')))
                    out.write(' %s ' % stats(root + '/' + name))
                    i = 0
                    out.write('<td>')

                    while '%s.%d' % (name, i) in files:
                        out.write(' :: <a href=\"%s/%s.%d\">%d</a>' %
                                  (root, name, i, i))
                        i = i + 1
                    out.write('</td>')
                    out.write('</tr>\n')

    out.write('</table>')
    out.write('<br>\n')
    out.write('</body></html>\n\n\n')
    out.close()

Exemple #30

0

Afficher le fichier

def get_elo(results):
    """
    "results" is an array of length 2*n+1 with aggregated frequences
    for n games."""
    results = LLRcalc.regularize(results)
    games, mu, var = stats(results)
    stdev = math.sqrt(var)

    # 95% confidence interval for mu
    mu_min = mu + Phi_inv(0.025) * stdev / math.sqrt(games)
    mu_max = mu + Phi_inv(0.975) * stdev / math.sqrt(games)

    el = elo(mu)
    elo95 = (elo(mu_max) - elo(mu_min)) / 2.0
    los = Phi((mu - 0.5) / (stdev / math.sqrt(games)))

    return el, elo95, los

Exemple #31

0

Afficher le fichier

def computeClippedImageStats(im, low=3, high=3, ignore=None):
    import collections

    im = im[~(np.isnan(im) | np.isinf(im))]
    if ignore is not None:
        for i in ignore:
            im = im[im != i]
    tmp = im
    if low != 0 and high != 0 and tmp.min() != tmp.max():
        _, low, upp = scipy.stats.sigmaclip(tmp, low=low, high=high)
        if not np.isnan(low) and not np.isnan(upp) and low != upp:
            tmp = im[(im > low) & (im < upp)]
    mean1 = np.nanmean(tmp)
    sig1 = np.nanstd(tmp)

    stats = collections.namedtuple('stats', 'mean stdev min max')
    return stats(mean=mean1, stdev=sig1, min=np.nanmin(im), max=np.nanmax(im))

Exemple #32

0

Afficher le fichier

Fichier : statistics.py Projet : katesanders9/fluids_exp

def F2(env, pi1, pi2, sup, T, num_samples=1):
    losses = []
    for i in range(num_samples):
        # collect trajectory with states visited and actions taken by agent
        tmp_states, _, _, _ = collect_traj(env, pi1, T)
        tmp_actions = np.array([pi2.intended_action(s) for s in tmp_states])
        tmp_scores = np.array([pi2.decision_function(s) for s in tmp_states])
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        n = len(sup_actions)

        hinge = hinge_loss(sup_actions, tmp_scores)
        penalty = pi2.est.alpha * .5 * np.square(np.linalg.norm(pi2.est.coef_))
        print("hinge: " + str(hinge))
        print("penalty: " + str(penalty))
        errors = hinge / n + penalty

        # compute the mean error on that trajectory (may not be T samples since game ends early on failures)
        losses.append(np.mean(errors))

    # compute the mean and sem on averaged losses.
    return stats(losses)

Exemple #33

0

Afficher le fichier

def eval_sup_statistics_cont(env, agent, sup, T, num_samples=1):
    """
        Evaluate loss on the supervisor's trajectory in the given env
        for T timesteps
    """
    losses = []
    for i in range(num_samples):
        # collect states made by the supervisor (actions are sampled so not collected)
        tmp_states, _, _, _ = collect_traj(env, sup, T)

        # get inteded actions from the agent and supervisor
        tmp_actions = np.array([agent.intended_action(s) for s in tmp_states])
        sup_actions = np.array([sup.intended_action(s) for s in tmp_states])
        errors = (sup_actions - tmp_actions)**2.0

        # compute the mean error on that traj
        errors = np.sum(errors, axis=1)
        losses.append(np.mean(errors))

    # generate statistics, same as above
    return stats(losses)

Exemple #34

0

Afficher le fichier

Fichier : printstats.py Projet : ebaumeis/labscripts

def rows(infile, outfile, minstep, maxstep):
  out = open(outfile,'w')
  data = np.loadtxt(infile,skiprows=1)
  for x in range(1,data.shape[1]):
    out.write(stats(infile,x,minstep,maxstep) + ' ' + repr(x) + '-' + repr(x+1) + '\n')

Exemple #35

0

Afficher le fichier

p5 = portfolio_return(data, marketcapweights)[pd.datetime(1994, 1, 1):]

drag1 = p3 - p1
drag2 = p4 - p5


def stats(x):
    ann_mean = x.mean() * 12
    ann_std = x.std() * (12**.5)
    geo_mean = ann_mean - (ann_std**2) / 2.0
    sharpe = geo_mean / ann_std

    return (ann_mean, ann_std, geo_mean, sharpe)


print stats(p1)
print stats(p3)
print stats(p4)
print stats(p5)

toplot = pd.concat([p1, p3, p4, p5], axis=1)
toplot.columns = ["Optimised", "Handcraft", "Equal", "Market Cap"]
toplot.cumsum().plot()

show()

p1.cumsum().plot(color="black", ls="solid")
p3.cumsum().plot(color="gray", ls="solid")
p4.cumsum().plot(color="black", ls="dashed")
p5.cumsum().plot(color="gray", ls="dashed")

Exemple #36

0

Afficher le fichier

Fichier : spheres.py Projet : cgalvin93/spheres

    meso_array = scipy.array(meso_list)
    thermo_array = scipy.array(thermo_list)
    meso_mean = scipy.mean(meso_array)
    meso_std = scipy.std(meso_array)
    thermo_mean = scipy.mean(thermo_array)
    thermo_std = scipy.std(thermo_array)
    p_val = scipy.stats.ttest_ind(thermo_array, meso_array)[1]
    s = '\nMm: ' + str(meso_mean) + '\nSTDm: ' + str(
        meso_std) + '\nMt: ' + str(thermo_mean) + '\nSTDt: ' + str(
            thermo_std) + '\nP: ' + str(p_val)
    return s


######get the stats and write to outfile
ofile = open('results5A-10k-P5050-thesisnorm-wnq.txt', 'w')
nip_stats = stats(meso_nips, thermo_nips)
s1 = 'Normalized N_ip: ' + nip_stats + '\n'
ofile.write(s1)
ratio_stats = stats(meso_ratios, thermo_ratios)
s2 = '\nRatio rep atr: ' + ratio_stats + '\n'
ofile.write(s2)
iso_stats = stats(meso_isos, thermo_isos)
s3 = '\nFraction isolated: ' + iso_stats + '\n'
ofile.write(s3)
branched_stats = stats(meso_branches, thermo_branches)
s4 = '\nFraction branched: ' + branched_stats + '\n'
ofile.write(s4)
meso_nip_data = str(meso_nips)
thermo_nip_data = str(thermo_nips)
meso_ratio_data = str(meso_ratios)
thermo_ratio_data = str(thermo_ratios)

Exemple #37

0

Afficher le fichier

Fichier : pysummary.py Projet : xizhonghua/pysummary

        sum = s,
        lines = l,
        mean = m,
        variance = v,
        std_dev = std_dev,
        min = min_value,
        max = max_value,
        median = median_value,
        confidence = confidence,
        low_limit = ci[0],
        high_limit = ci[1]);

    return result    

if  __name__ == "__main__":
    parse_args()

    result = stats(sys.stdin, field, delimiter, skip, confidence, navalue)

    print_st("Field", result.field)
    print_st("Lines", result.lines)
    print_st("Mean", result.mean)
    print_st("Variance", result.variance)
    print_st("StdDev", result.std_dev)
    print_st("Sum", result.sum)
    print_st("Min", result.min)
    print_st("Max", result.max)
    print_st("Median", result.median)
    print_st("Confidence", result.confidence)
    print_st("Cnf.Itv.L", result.low_limit)
    print_st("Cnf.Itv.U", result.high_limit)

Exemple #38

0

Afficher le fichier

Fichier : mob_stats.py Projet : gmazz/BioInfoTools

    #plt.show()



def stats(binding_data, proximity_data):
    n_bins = 50
    hist_b, bins_b = np.histogram(binding_data, bins=np.linspace(np.min(binding_data), np.max(binding_data), n_bins))
    hist_p, bins_p = np.histogram(proximity_data, bins=np.linspace(np.min(binding_data), np.max(binding_data), n_bins))
    #print hist_b, hist_p
    #print "Binding data> mean:%s, median:%s, std:%s" %(np.mean(binding_data), np.median(binding_data), np.std(binding_data))
    #print "Proximity data> mean:%s, median:%s, std:%s" %(np.mean(proximity_data), np.median(proximity_data), np.std(proximity_data))
    #print scipy.stats.spearmanr(hist_b, hist_p)
    #print bins_b, bins_p
    print ks_2samp(hist_b, hist_b)
    print ks_2samp(hist_b, hist_p)



mob_name = 'crystals_mob.txt'
aln_name = 'crystals_aln.csv'
#mob_name = 'models_mob.txt'
#aln_name = 'models_aln.csv'


mob_dict, aln_dict = get_data(mob_name, aln_name)
data = combine_data(mob_dict, aln_dict)
binding_data, proximity_data = mobility_stats(data)
print len(binding_data), len(proximity_data)
#plots(binding_data, proximity_data)
stats(binding_data, proximity_data)

Exemple #39

0

Afficher le fichier

Fichier : contours.py Projet : jessicaluna/Midterm

    L = L.ravel()

    # obtain the indices to sort and unsort the flattened array
    i_sort = np.argsort(L)[::-1]
    i_unsort = np.argsort(i_sort)

    L_cumsum = L[i_sort].cumsum()
    L_cumsum /= L_cumsum[-1]
    
    xbins = 0.5 * (xbins[1:] + xbins[:-1])
    ybins = 0.5 * (ybins[1:] + ybins[:-1])

    return xbins, ybins, L_cumsum[i_unsort].reshape(shape)
    

Msin_median,Msin_std,Msin_range =stats(Msin_samples)
print('Msini med,std,range',Msin_median/(1.898*10**(30)),Msin_std,Msin_range/(1.898*10**(30)))
Period_median,Period_std,Period_range=stats(Period_samples)
print('Period med,std,range',Period_median,Period_std,Period_range)
Tzero_median,Tzero_std,Period_range=stats(Tzero_samples)
print('Tzero med,std,range',Tzero_median,Tzero_std,Period_range)

#Msini_pdf=mlab.normpdf(Msin_samples,Msin_median,Msin_std)
#Period_pdf=mlab.normpdf(Period_samples,Period_median,Period_std)
#Tzero_pdf=mlab.normpdf(Tzero_samples,Tzero_median,Tzero_std)

Msin,Period,sig_MP=compute_sigma_level(Msin_samples, Period_samples)
Msin,Tzero,sig_MT=compute_sigma_level(Msin_samples, Tzero_samples)
Tzero,Period,sig_TP=compute_sigma_level(Tzero_samples, Period_samples)

Abest=77.47156475290217

Exemple #40

0

Afficher le fichier

Fichier : pystats.py Projet : hxwang/DataCenterWorkloads

        data.append(float(item))

    s = sum(data)
    l = len(data)
    m = mean(data, s)
    v = variance(data, m)
    min_value = 0 if len(data) == 0 else min(data)
    max_value = 0 if len(data) == 0 else max(data)
    std_dev = v ** 0.5
    median_value = median(data)
    ci = mean_confidence_interval(data, confidence)

    return (field, l, m, v, std_dev, s, min_value, max_value, median_value, ci)

if  __name__ == "__main__":
    parse_args()

    field, l, m, v, std_dev, s, min_value, max_value, median_value, ci = stats(sys.stdin, field, delimiter, skip, confidence, navalue)

    print_st("Field", field)
    print_st("Lines", l)
    print_st("Mean", m)
    print_st("Variance", v)
    print_st("StdDev", std_dev)
    print_st("Sum", s)
    print_st("Min", min_value)
    print_st("Max", max_value)
    print_st("Median", median_value)
    print_st("Confidence", confidence)
    print_st("Cnf.Itv.L", ci[0])
    print_st("Cnf.Itv.U", ci[1])

Exemple #41

0

Afficher le fichier

Fichier : MassRich.py Projet : nkern/C4

	root = '/nfs/christoq_ls/nkern/C4/MassRich/TRUTH'

	# Load Data
	truth = fits.open(data_root+'/c4_cluster_truth_revH100_rev5.fits')[1].data
	truth['m200mean']*=1e10
	truth['m200crit']*=1e10

	Rich = truth['n10virm19']
	Mass = truth['m200crit']

	rich_low,rich_high,rich_step = 3.3,4.8,0.3
	richbins = np.arange(rich_low,rich_high,rich_step)
	mass_low,mass_high,mass_step = 14.25,15.15,0.2
	massbins = np.arange(mass_low,mass_high,mass_step)

	d = stats(Rich,Mass,rich_low,rich_high,rich_step,log10=False,fitline=True)
	globals().update(d)

	plot = True
	if plot == True:
		fig,ax = mp.subplots(1,2,figsize=(13,7))
		## ax[0] is MassRich
		p1, = ax[0].plot(X,Y,'ko',alpha=.2)
		ax[0].set_xlim(X.min()-.1,X.max()+.1)
		ax[0].set_ylim(Y.min()-.1,Y.max()+.5)
		ax[0].set_xlabel('log( N200 )',fontsize=15)
		ax[0].set_ylabel('log( Mass )',fontsize=15)
		ax[0].fill_between(richbins,Y.min()-.1,Y.max()+.5,alpha=.1)
		ax[0].plot(X,X*model.coef_+model.intercept_,'r')

		## ax[1] is Scatter

Exemple #42

0

Afficher le fichier

Fichier : analysis.py Projet : rla3rd/analysis

def summaryStats(df, filterColumns, returnColumns, regress=None, debug=False):
    if regress == None:
        regressionColumns = [None]
    else:
        regressionColumns = [None] + regress
    groupby={}
    data = df
    key = 'ALL'
    val = 'ALL'

    statColumns = ['return',
                    'total', 'win_ct', 'lose_ct', 'win_ratio', 'lose_ratio',
                    'return_med', 'return_avg', 'return_stddev', 'return_min', 'return_max']

    if regress != None:
        statColumns.reverse()
        statColumns.append('variable')
        statColumns.reverse()
        statColumns.append('slope')
        statColumns.append('intercept')
        statColumns.append('r')
        statColumns.append('r_low')
        statColumns.append('r_high')
        statColumns.append('2_tail_prob')
        statColumns.append('std_err')

    statColumns.reverse()
    statColumns.append('columnValue')
    statColumns.append('columnKey')
    statColumns.reverse()

    rows = []
    
    for returns in returnColumns:
        returnsData = data.dropna(subset=[returns])
        for var in regressionColumns:
            if var != None and len(returnsData)>0:
                if debug:
                    print key, val, var, returns
                regressionData = returnsData.dropna(subset=[var])
                tmpFilters = regressionData[var]
            else:
                regressionData = returnsData
                tmpFilters = []
                    
            tmpReturns = regressionData[returns]
            results = stats(var, tmpFilters, tmpReturns)
                   
            row = {'columnKey': key, 'columnValue': val, 'variable': var, 'return': returns}
            row.update(results)
                      
            rows.append(row)

    for col in filterColumns:
        g = data[col].unique()
        groupby[col] = filter( None, [v if pd.notnull(v) else None for v in g]) 
    
    keys = groupby.keys() 
    
    for key in keys:
        for val in groupby[key]:
            filteredData = data[data[key] == val]
            for returns in returnColumns:
                returnsData = filteredData.dropna(subset=[var])
                for var in regressionColumns:
                    if var != None:
                        regressionData = returnsData.dropna(subset=[var])
                        tmpFilters = regressionData[var]
                    else:
                        regressionData = returnsData
                        tmpFilters = []
    
                    tmpReturns = regressionData[returns]
                    results = stats(var, tmpFilters, tmpReturns)
    
                    row = {'columnKey': key, 'columnValue': val, 'variable': var, 'return': returns}
                    row.update(results)
    
                    rows.append(row)

    data = pd.DataFrame(rows, columns=statColumns)
    try:
        data['r'] = data['r'].real
        data['r_low'] = data['r_low'].real
        data['r_high'] = data['r_high'].real
    except:
        pass
    return data