예제 #1
0
def station_stats(df):

    print('\nCalculating The Most Popular Stations and Trip...\n')
    print('')
    start_time = time.time()

    df['Station'] = pd.to_string(df['Station'])

    df['start'] = df['Station'].dt.start
    common_start_station = df['start'].mode()[0]

    print('Most Common Start Station:', common_start_station)
    print('')

    df['end'] = df['Station'].dt.start
    common_end_station = df['end'].mode()[0]

    print('Most Common End Station:', common_end_station)
    print('')

    df['combo'] = df['Start Station'] + ' to ' + df['End Station']
    common_station_combo = df['combo'].mode().loc[0]

    print('Most common Combination:', common_station_combo)
    print('')

    print("\nThis took %s seconds." % (time.time() - start_time))
    print('-' * 40)
예제 #2
0
파일: code.py 프로젝트: hirax/PyWeaver
def f():

    x = display['grid_data']['data']
    cols = display['grid_data']['columns']
    x = x[:-1]  # Get rid of the empty bottom row
    df = pd.DataFrame(x, columns=cols)

    col_types = display['grid_data']['column_types']

    for i, t in enumerate(col_types):
        c = cols[i]  # Column name

        if t == 'Number':
            df[c] = pd.to_numeric(df[c])
        elif t == 'Date':
            df[c] = pd.to_datetime(df[c])
        else:
            # Default: convert to text
            df[c] = pd.to_string(df[c])

    return df
# RENAME FOR CONVENIENCE
df.columns = [
    'Week', 'Day', 'NonUrgent', 'Urgent', 'TypeA', 'TypeB', 'TypeC', 'Fiscal',
    'Traffic', 'Banking1', 'Banking2', 'Banking3', 'Total'
]

df['No'] = range(0, len(df))
df['Month'] = np.where(df['No'] > 38, 3, 2)
df['Month'] = np.where(df['No'] < 19, 1, df['Month'])

df['dayofmonth'] = 7 * (df['Week'] - 1) + (df['Day'] - 2)

df['dayofmonth'] = np.where(df['No'] > 38, df['dayofmonth'] - 2,
                            df['dayofmonth'])

df['daystring'] = pd.to_string(df['dayofmonth'])

df['daystring'] = np.where(df['dayofmonth'] < 10, "0" + str(df['dayofmonth']),
                           str(df['dayofmonth']))

df['datetime_string'] = "1999-" + str(df['Month']) + "-" + df['daystring']

df['datetime'] = pd.to_datetime(df1['Actual Settlement Date'],
                                format='%Y-%m-%d')

new_df = dp.apply_order_fill_index(df, datetime_column, index_col_name,
                                   expected_interval)

# ###########################################################################################################
# CREATE AN INDEX COLUMN TO MANIPULATE
# IN MANY SCENARIOS YOU MIGHT WANT TO MAKE THIS AWARE OF THE GAPS IN THE
예제 #4
0
df.sum(axis=0)


# In[82]:


#ADVANCED # 2 Explore the dataset using functions like to_string(), columns, index, dtypes, shape, info() and describe()
pd=pd.read_csv(r'C:\Users\Nat\Downloads\insurance.csv')
print(pd)


# In[83]:


# to_string(), columns, index, dtypes, shape, info() and describe()
pd.to_string()


# In[84]:


pd.columns


# In[85]:


pd.columns


# In[86]:
예제 #5
0
pd.merge(left, right, left_on='key', right_index=True, how='outer')
result = pd.merge(left.reset_index(), right.reset_index(), on=['key'], how='inner').set_index(['key','Y'])
df1=df
df2=df
result = df1.combine_first(df2) #覆盖数据。用于更新场景
df1.update(df2) #覆盖数据
left.join(right, on=['abc', 'xy'], how='inner')


#数据透视表
df=[]
df.pivot(index='date', columns='variable', values='value')
#aggfunc: function to use for aggregation, defaulting to numpy.mean.
# aggfunc:  'mean' ,np.sum,'size',['mean', 'sum']
pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum,margins=True, fill_value=0)
pd.to_string(na_rep='')
#level: ['animal', 'hair_length'],[1, 2]
df.stack(level=['animal', 'hair_length'])
df.stack(level=[1, 2])
df.stack('exp')
df.unstack(0)
pd.crosstab(df.A, df.B, values=df.C, aggfunc=np.sum, normalize=True,  margins=True)
ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60])
c = pd.cut(ages, bins=[0, 18, 35, 70])
pd.get_dummies(pd.cut(ages, bins=[0, 18, 35, 70]))
#prefix: 'new_prefix',['from_A', 'from_B']
pd.get_dummies(df, columns=['A','B'], prefix='new_prefix')
labels, uniques = pd.factorize(df['a'])
cols = np.array(['key', 'row', 'item', 'col'])
df = cols + pd.DataFrame((np.random.randint(5, size=(20, 4))// [2, 1, 2, 1]).astype(str))
df.explode('values')