Python set_eng_float_format Beispiele, pandas.set_eng_float_format Python Beispiele

Beispiel #1

0

Datei anzeigen

def calculate_ndvi_and_cloud_percent_for_the_parcel(df_ext, cloud_categories):
    # we make a copy first of the dataframe passed to this function to avoid changing the original
    # dataframe
    df = df_ext.copy()
    # Convert the epoch timestamp to a datetime
    df['date_part']=df['date_part'].map(lambda e: datetime.datetime.fromtimestamp(e))
    df['cloud_pct'] = df['hist'].apply(lambda s: get_cloudyness(s, cloud_categories)[1])
    bands = ['B04', 'B08']
    # Check if extraction exists for these bands 4 and 8 for NDVI calculation, otherwise quit
    length_of_band0 = len(df[df['band']==bands[0]])
    length_of_band1 = len(df[df['band']==bands[1]])
    if length_of_band0>0 and length_of_band1>0:
         # Treat each band separately.
        df0 = df[df['band']==bands[0]][['date_part', 'mean', 'count', 'std', 'cloud_pct', 'reference']]
        df1 = df[df['band']==bands[1]][['date_part', 'mean', 'count', 'std', 'cloud_pct', 'reference']]
        # Merge back into one DataFrame based on reference that should be unique
        dff = pd.merge(df0, df1, on = 'reference', suffixes = (bands[0], bands[1]))
        dff['ndvi'] = (dff[f"mean{bands[1]}"]-dff[f"mean{bands[0]}"])/(dff[f"mean{bands[1]}"]+dff[f"mean{bands[0]}"])
        dff['utm_number'] = dff['reference'].apply(lambda s: get_utm_number_from_reference(s))

        dff['ndvi_std'] = dff.apply(lambda x: calculate_ndvi_std_from_band_mean_and_std(x.meanB04,x.meanB08,x.stdB04,x.stdB08), axis=1)

        pd.set_option('precision', 3)
        pd.set_eng_float_format(accuracy=3)
        return dff
    else:
        return pd.DataFrame()

Beispiel #2

0

Datei anzeigen

def t_test(data=None, independent=None, dependent=None):

    pd.set_eng_float_format(accuracy=3, use_eng_prefix=False)
    independent_groups = pd.unique(data[independent])
    if len(independent_groups)>2:
        print('There are more than 2 groups in the independent variable')
        print('t-test is not the correct statistical test to run in that circumstance,')
        print('consider running an ANOVA')
        return

    mct = parammct(data=data, independent=independent, dependent=dependent)

    t_test_value, p_value = stats.ttest_ind(data[dependent][data[independent] == independent_groups[0]],
                                            data[dependent][data[independent] == independent_groups[1]])

    difference_mean = np.abs(mct.loc['Mean'][0] - mct.loc['Mean'][1])
    pooled_sd = np.sqrt( ( ((mct.loc['n'][0]-1)*mct.loc['SD'][0]**2) + ((mct.loc['n'][1]-1)*mct.loc['SD'][1]**2) ) /
                         (mct.loc['n'][0] + mct.loc['n'][1] - 2) )
    sedifference = pooled_sd * np.sqrt( (1/mct.loc['n'][0]) + (1/mct.loc['n'][1]) )
    difference_mean_ci1 = difference_mean + (t_test_value * sedifference)
    difference_mean_ci2 = difference_mean - (t_test_value * sedifference)
    if difference_mean_ci1>difference_mean_ci2:
        difference_mean_cilower = difference_mean_ci2
        difference_mean_ciupper = difference_mean_ci1
    else:
        difference_mean_cilower = difference_mean_ci1
        difference_mean_ciupper = difference_mean_ci2
    cohend = difference_mean / pooled_sd
    t_test_result= pd.DataFrame ([difference_mean, sedifference, t_test_value, p_value,
                                  difference_mean_cilower, difference_mean_ciupper, cohend],
                                 index = ['Difference between means', 'SE difference', 't-test', 'p-value',
                                          'Lower bound difference CI', 'Upper bound difference CI', 'Cohen\'s d'],
                                 columns=['Value'])

    return t_test_result

Beispiel #3

0

Datei anzeigen

def chi_square(data=None, variable1=None, variable2=None):

    pd.set_eng_float_format(accuracy=3, use_eng_prefix=False)

    variable1 = str(variable1)
    variable2 = str(variable2)
    if input_check_categorical_categorical(data, variable1, variable2):
        return

    values_var1=pd.unique(data[variable1])
    values_var2=pd.unique(data[variable2])

    problem_found=False
    for variable in [values_var1, values_var2]:
        if len(variable)<2:
            print(variable, 'has less than two categories. It has:', len(variable))
            problem_found=True
    if problem_found:
        return

    contingency_table = pd.crosstab(data[variable1], data[variable2])
    contingency_table = pd.DataFrame(contingency_table)
    display(Markdown('**Contingency Table**'))
    display(contingency_table)

    chi2_test=stats.chi2_contingency(contingency_table, correction=False)

    chi2_result = pd.Series ([chi2_test[0], chi2_test[1], chi2_test[2], chi2_test[3]],
                            index = ['Chi-square value', 'p-value', 'Degrees of freedom', 'Expected frequencies'])
    chi2_result = pd.DataFrame(chi2_result, columns=['Value'])
    display(Markdown('**Results Chi-square test**'))
    display(chi2_result)

    return

Beispiel #4

0

Datei anzeigen

def logistic_reg(data=None, independent=None, dependent=None):

    pd.set_eng_float_format(accuracy=3, use_eng_prefix=False)

    independent = str(independent)
    dependent = str(dependent)
    if input_check_categorical(data, independent, dependent):
        return

    if not len(pd.unique(data[dependent]))==2:
        print('Dependent variable must have two categories')
        print(dependent, 'variable has', len(pd.unique(data[dependent])), 'categories')
        return

    data['interceptant']=1
    independent=[independent, 'interceptant']
    logReg = sm.Logit(data[dependent], data[independent])
    regression = logReg.fit()
    display(regression.summary())
    display(Markdown('**Coefficients confidence intervals**'))
    display(regression.conf_int())

    predicted_values =regression.predict()
    plt.plot(data[independent[0]], data[dependent], 'o', label='Actual values')
    plt.plot(data[independent[0]], predicted_values, 'ok', label='Predicted probabilities')
    plt.xlabel(independent[0], fontsize=14)
    plt.ylabel('Probability '+dependent, fontsize=14)
    plt.ylim(-0.05, 1.05)
    plt.legend()
    plt.show()

    return

Beispiel #5

0

Datei anzeigen

Datei: stats.py Projekt: karlssonper/fantasyfootballstats

def printStats(players):
    players_sorted = sorted([players[name] for name in players], 
                            key=lambda p: p.totalP)
    d = {}
    for player in reversed(players_sorted[len(players_sorted)-25:]):
        d[player.name] = [player.totalP, player.avgP, player.medianP, 
                          player.stddevP, player.totalT, 
                          player.avgT, player.stddevT]
    pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)
    idx = ["T", "A", "M","V","TT", "AT", "VT"]
    print pd.DataFrame(d, index = idx).transpose().sort("T", ascending=False)

Beispiel #6

0

Datei anzeigen

def tukey(data=None, independent=None, dependent=None):

    pd.set_eng_float_format(accuracy=3, use_eng_prefix=False)

    independent = str(independent)
    dependent = str(dependent)
    if input_check_numerical_categorical(data, independent, dependent):
        return

    test = multi.MultiComparison(data[dependent], data[independent])
    res = test.tukeyhsd()
    display(res.summary())
    res.plot_simultaneous()

    return

Beispiel #7

0

Datei anzeigen

def estimate_relative_error_in_nominal_capacitance(df):
    # Calculate the relative percentage difference in the mean capacitance
    # values measured relative to the nominal values.
    cleaned_df = df.dropna().copy()
    C_relative_error = (cleaned_df.groupby('test_capacitor').apply(lambda x: (
        (x['C'] - x['test_capacitor']) / x['test_capacitor']).describe()))
    pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)
    print(
        'Estimated relative error in nominal capacitance values = %.1f%% '
        ' +/-%.1f%%' % (C_relative_error['mean'].mean() * 100,
                        C_relative_error['mean'].std() * 100))
    print C_relative_error[['mean', 'std']] * 100
    print

    return C_relative_error

Beispiel #8

0

Datei anzeigen

Datei: feedback_benchmarks.py Projekt: wheeler-microfluidics/dropbot-calibration

def estimate_relative_error_in_nominal_capacitance(df):
    # Calculate the relative percentage difference in the mean capacitance
    # values measured relative to the nominal values.
    cleaned_df = df.dropna().copy()
    C_relative_error = (cleaned_df.groupby('test_capacitor')
                        .apply(lambda x: ((x['C'] - x['test_capacitor']) /
                               x['test_capacitor']).describe()))
    pd.set_eng_float_format(accuracy=1, use_eng_prefix=True)
    print ('Estimated relative error in nominal capacitance values = %.1f%% '
           ' +/-%.1f%%' % (C_relative_error['mean'].mean() * 100,
                           C_relative_error['mean'].std() * 100))
    print C_relative_error[['mean', 'std']] * 100
    print


    return C_relative_error

Beispiel #9

0

Datei anzeigen

def anova(data=None, independent=None, dependent=None):

    pd.set_eng_float_format(accuracy=3, use_eng_prefix=False)

    independent = str(independent)
    dependent = str(dependent)
    if input_check_numerical_categorical(data, independent, dependent):
        return

    formula = dependent + ' ~ ' + independent
    model = ols(formula, data=data).fit()
    aov_table = sm.stats.anova_lm(model, typ=2)
    aov_table.rename(columns={'PR(>F)':'p'}, inplace=True)
    aov_table['F'] = pd.Series([aov_table['F'][0], ''], index = [independent, 'Residual'])
    aov_table['p'] = pd.Series([aov_table['p'][0], ''], index = [independent, 'Residual'])
    eta_sq = aov_table['sum_sq'][0]/(aov_table['sum_sq'][0]+aov_table['sum_sq'][1])
    aov_table['Eta squared'] = pd.Series([eta_sq, ''], index = [independent, 'Residual'])

    return aov_table

Beispiel #10

0

Datei anzeigen

import textwrap

import numpy as np
import pandas as pd

from matplotlib import pylab as plt
from matplotlib.gridspec import GridSpec
from matplotlib import ticker
from matplotlib.ticker import MaxNLocator
from mpl_toolkits.axes_grid.anchored_artists import AnchoredText

from .kplot import tprop
from .. import tfind
from .. import tval

pd.set_eng_float_format(accuracy=3,use_eng_prefix=True)
plt.rc('axes',color_cycle=['RoyalBlue','Tomato'])
plt.rc('font',size=8)
def print_traceback(f):
    """
    Decorator so that we can fail gracefully from a plotting mishap
    """
    def wrapper_function(*args, **kwargs):
        try:
            return f(*args, **kwargs)
        except Exception:
            ax = plt.gca()
            error = traceback.format_exc()
            print(error)
            error = textwrap.fill(error,50)
            ax.text(0, 1, error, transform=ax.transAxes, va='top')

Beispiel #11

0

Datei anzeigen

Datei: population_summary_stats.py Projekt: jmaddock/wiki_languages

 def write_csv(self):
     if self.outfile:
         pd.set_eng_float_format(accuracy=3, use_eng_prefix=True)
         self.result.to_csv(self.outfile,encoding='utf-8',index=False)
     else:
         utils.log('No outfile specified!')

Beispiel #12

0

Datei anzeigen

Datei: same_time_test.py Projekt: TerYang/Preprocessing-data-Encoding

                       sep=' ',
                       index=False,
                       header=False,
                       mode='a',
                       encoding='utf-8',
                       float_format='%.4f',
                       index_label=None)
    # data.to_pickle(des,compression='zip')
    print('*' * 40)


if __name__ == "__main__":
    print('program  started at:', time.asctime(time.localtime(time.time()))
          )  #time.strftime('%Y-%m-%d,%H:%M:%S', time.localtime(time.time()))

    pd.set_eng_float_format(7, True)
    pd.set_option('precision', 7)
    # pd.set_option('chop_threshold', .5)

    # source_addr = "/home/gjj/PycharmProjects/ADA/netsData/hackingData/GANdata/from_raw_change_scaler/2/data/Attack_free_dataset2.pkl"
    # dire_addr = "/home/gjj/PycharmProjects/ADA/netsData/hackingData/GANdata/from_raw_change_scaler/"
    # dire_addr = "/home/gjj/PycharmProjects/ADA/netsData/hackingData/GANdata/from_raw_change_scaler/2/data"

    # print('program  start at:', time.strftime('%Y-%m-%d,%H:%M:%S', time.localtime(time.time())))
    # # print('data from :%s'%source_addr)
    # print()
    # os.chdir(os.path.dirname(dire_addr))
    # dire_url = os.path.join(dire_addr, 'Attack_free_dataset_64.txt')
    # print("\ncurrent at:{}".format(os.getcwd()))
    # print()
    """pkl to txt"""

Beispiel #13

0

Datei anzeigen

Datei: routing_results_summary.py Projekt: cfobel/cyvpr

def main(routing_hdf_path, net_file_namebase):
    format_opts = dict(((k, pd.get_option(k)) for k in ('float_format',
                                                        'column_space')))
    # Format floats to:
    #
    #   * Avoid small float values being displayed as zero _(e.g.,
    #     critical-path-delay)_.
    #   * Use engineering postfix to make it easier to compare values
    #     at-a-glance _(e.g., `u` for micro, `n` for nano, etc.)_.
    pd.set_eng_float_format(accuracy=3, use_eng_prefix=True)
    h5f = ts.open_file(str(routing_hdf_path), 'r')

    # In our case, we need to first load the data from our `route_states` table
    # from the HDF file into a `pandas.DataFrame` instance.
    net_file_routings = getattr(h5f.root, net_file_namebase)
    data = np.array([v.fetch_all_fields()
                     for v in net_file_routings.route_states],
                    dtype=net_file_routings.route_states.dtype)
    routing_results = pd.DataFrame(data)
    h5f.close()

    string_io = StringIO.StringIO()
    indent = 4 * ' '

    print >> string_io, '# [%s] Routing results summary #\n' % net_file_namebase
    _min_success_data = min_success_data(routing_results)
    if len(_min_success_data) > 1:
        min_success_summary = _min_success_data.describe()
    elif len(_min_success_data) == 1:
        min_success_summary = _min_success_data.iloc[0]
    print >> string_io, '## Minimum routable channel-width summary ##\n'
    print >> string_io, prefix_lines(min_success_summary, indent)

    print >> string_io, '\n' + 70 * '-' + '\n'

    _max_failed_data = max_failed_data(routing_results)
    if len(_min_success_data) > 1:
    #max_failed_summary = _max_failed_data.describe().astype('i')
        max_failed_summary = _max_failed_data.describe()
    elif len(_min_success_data) == 1:
        max_failed_summary = _max_failed_data.iloc[0]
    print >> string_io, '## Maximum unroutable channel-width summary ##\n'
    print >> string_io, prefix_lines(max_failed_summary, indent)

    incomplete_routing_searches = np.where(
        min_success_max_failed_channel_width_diff(routing_results) != 1)
    if len(incomplete_routing_searches[0]):
        print >> string_io, 'Incomplete routings:'
        print >> string_io, '\n'.join(['  * `%s`' %
                                       pformat(routing_results
                                               ['block_positions_sha1'][i])
                                     for i in incomplete_routing_searches[0]])

    print >> string_io, '\n' + 70 * '-' + '\n'

    print >> string_io, ('## Missing routability result routing configurations'
                         ' ##\n')
    print >> string_io, '\n'.join(['  * `%s`' % pformat(v) for v in
                                   missing_routability_result_configs
                                   (routing_results)])

    print >> string_io, '\n' + 70 * '-' + '\n\n'

    for k, v in format_opts.iteritems():
        if v is not None:
            pd.set_option(k, v)
    return string_io.getvalue(), routing_results

Beispiel #14

0

Datei anzeigen

#!/usr/bin/env python
# coding: utf-8

# In[1]:


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# In[2]:


pd.set_eng_float_format(accuracy=4)


# In[3]:


train = pd.read_csv(r'E:\Users\quadr\Documents\datascience-arquivos\Kaggle\Ashrae_Energy_Prediction\train.csv')
w_train = pd.read_csv(r'E:\Users\quadr\Documents\datascience-arquivos\Kaggle\Ashrae_Energy_Prediction\weather_train.csv')


# In[4]:


b_meta = pd.read_csv(r'E:\Users\quadr\Documents\datascience-arquivos\Kaggle\Ashrae_Energy_Prediction\building_metadata.csv')

Beispiel #15

0

Datei anzeigen

 def eng():
     import pandas as pd
     pd.set_eng_float_format(accuracy=3, use_eng_prefix=True)
     pd.options.display.float_format = '{:, .5f}'.format
     pd.set_option('precision', 7)

Beispiel #16

0

Datei anzeigen

def set_format():
    pd.set_eng_float_format(accuracy=2, use_eng_prefix=False)