def station_stats(df): print('\nCalculating The Most Popular Stations and Trip...\n') print('') start_time = time.time() df['Station'] = pd.to_string(df['Station']) df['start'] = df['Station'].dt.start common_start_station = df['start'].mode()[0] print('Most Common Start Station:', common_start_station) print('') df['end'] = df['Station'].dt.start common_end_station = df['end'].mode()[0] print('Most Common End Station:', common_end_station) print('') df['combo'] = df['Start Station'] + ' to ' + df['End Station'] common_station_combo = df['combo'].mode().loc[0] print('Most common Combination:', common_station_combo) print('') print("\nThis took %s seconds." % (time.time() - start_time)) print('-' * 40)
def f(): x = display['grid_data']['data'] cols = display['grid_data']['columns'] x = x[:-1] # Get rid of the empty bottom row df = pd.DataFrame(x, columns=cols) col_types = display['grid_data']['column_types'] for i, t in enumerate(col_types): c = cols[i] # Column name if t == 'Number': df[c] = pd.to_numeric(df[c]) elif t == 'Date': df[c] = pd.to_datetime(df[c]) else: # Default: convert to text df[c] = pd.to_string(df[c]) return df
# RENAME FOR CONVENIENCE df.columns = [ 'Week', 'Day', 'NonUrgent', 'Urgent', 'TypeA', 'TypeB', 'TypeC', 'Fiscal', 'Traffic', 'Banking1', 'Banking2', 'Banking3', 'Total' ] df['No'] = range(0, len(df)) df['Month'] = np.where(df['No'] > 38, 3, 2) df['Month'] = np.where(df['No'] < 19, 1, df['Month']) df['dayofmonth'] = 7 * (df['Week'] - 1) + (df['Day'] - 2) df['dayofmonth'] = np.where(df['No'] > 38, df['dayofmonth'] - 2, df['dayofmonth']) df['daystring'] = pd.to_string(df['dayofmonth']) df['daystring'] = np.where(df['dayofmonth'] < 10, "0" + str(df['dayofmonth']), str(df['dayofmonth'])) df['datetime_string'] = "1999-" + str(df['Month']) + "-" + df['daystring'] df['datetime'] = pd.to_datetime(df1['Actual Settlement Date'], format='%Y-%m-%d') new_df = dp.apply_order_fill_index(df, datetime_column, index_col_name, expected_interval) # ########################################################################################################### # CREATE AN INDEX COLUMN TO MANIPULATE # IN MANY SCENARIOS YOU MIGHT WANT TO MAKE THIS AWARE OF THE GAPS IN THE
df.sum(axis=0) # In[82]: #ADVANCED # 2 Explore the dataset using functions like to_string(), columns, index, dtypes, shape, info() and describe() pd=pd.read_csv(r'C:\Users\Nat\Downloads\insurance.csv') print(pd) # In[83]: # to_string(), columns, index, dtypes, shape, info() and describe() pd.to_string() # In[84]: pd.columns # In[85]: pd.columns # In[86]:
pd.merge(left, right, left_on='key', right_index=True, how='outer') result = pd.merge(left.reset_index(), right.reset_index(), on=['key'], how='inner').set_index(['key','Y']) df1=df df2=df result = df1.combine_first(df2) #覆盖数据。用于更新场景 df1.update(df2) #覆盖数据 left.join(right, on=['abc', 'xy'], how='inner') #数据透视表 df=[] df.pivot(index='date', columns='variable', values='value') #aggfunc: function to use for aggregation, defaulting to numpy.mean. # aggfunc: 'mean' ,np.sum,'size',['mean', 'sum'] pd.pivot_table(df, values='D', index=['B'], columns=['A', 'C'], aggfunc=np.sum,margins=True, fill_value=0) pd.to_string(na_rep='') #level: ['animal', 'hair_length'],[1, 2] df.stack(level=['animal', 'hair_length']) df.stack(level=[1, 2]) df.stack('exp') df.unstack(0) pd.crosstab(df.A, df.B, values=df.C, aggfunc=np.sum, normalize=True, margins=True) ages = np.array([10, 15, 13, 12, 23, 25, 28, 59, 60]) c = pd.cut(ages, bins=[0, 18, 35, 70]) pd.get_dummies(pd.cut(ages, bins=[0, 18, 35, 70])) #prefix: 'new_prefix',['from_A', 'from_B'] pd.get_dummies(df, columns=['A','B'], prefix='new_prefix') labels, uniques = pd.factorize(df['a']) cols = np.array(['key', 'row', 'item', 'col']) df = cols + pd.DataFrame((np.random.randint(5, size=(20, 4))// [2, 1, 2, 1]).astype(str)) df.explode('values')