def basic_set(csvStr, csvStr2): # Import CSV ray_df = pd.read_csv(csvStr) ray_df2 = pd.read_csv(csvStr2, parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime']) # Accesses result = ray_df.head() result2 = ray_df2.head() return ray_df
def test_from_csv_delimiter(): setup_csv_file(SMALL_ROW_SIZE, delimiter='|') pandas_df = pandas.read_csv(TEST_CSV_FILENAME) ray_df = pd.read_csv(TEST_CSV_FILENAME) assert ray_df_equals_pandas(ray_df, pandas_df) teardown_csv_file()
import ray.dataframe as pd #import pandas as pd print('############ 12: Test Ray TimeSeries #############') ray_df = pd.read_csv("yellow_tripdata_2015-01-01.csv") print(' Read_CSV finished. Result:') print(ray_df.head(3)) ray_df = ray_df.set_index('tpep_pickup_datetime') print(' set_index finished. Result:') print(ray_df.head(3)) ray_df.index = pd.to_datetime(ray_df.index) print(' to_datetime finished. Result:') print(ray_df.head(3)) ray_resamp = ray_df.passenger_count.resample('1d') print(' resample finished') ray_mn = ray_resamp.mean() print(' mean of resample finished. Result:') print(ray_mn) ray_roll = ray_df.passenger_count.rolling(10) print(' rolling aggregation finished. Result:') print(ray_roll) result = ray_roll.mean()
import ray.dataframe as pd #import pandas as pd print('############ 3: Test Ray Convert [Column] toDateTime #############') ray_df = pd.read_csv("yellow_1of3.csv") print(' Read_CSV finished. Result:') print(ray_df.head(3)) ray_df['tpep_pickup_datetime'] = pd.to_datetime(ray_df['tpep_pickup_datetime']) print(ray_df.head(3)) print(' to_datetime(df[column]) finished. Result above.')
import ray.dataframe as pd #import pandas as pd print('############ 10: Test Ray Merge #############') ray_df = pd.read_csv( "yellow_tripdata_2015-01-01.csv", parse_dates=['tpep_pickup_datetime', 'tpep_dropoff_datetime']) print(' Read_CSV finished. Result:') print(ray_df.head(3)) ray_payments = pd.DataFrame({ 'num': [1, 2, 3, 4, 5, 6], 'payment_name': ['Credit Card', 'Cash', 'No Charge', 'Dispute', 'Unknown', 'Voided trip'] }) print(' new DataFrame finished. Result:') print(ray_payments) ray_df2 = ray_df.merge(ray_payments, left_on="payment_type", right_on="num") print(' merge finished. Result:') print(ray_df2.head(3)) ray2_groupby = ray_df2.groupby(ray_df2.payment_name) print(' groupby on merge finished. Result:') print(ray2_groupby.head(3)) # Ray does not support getting column from groupby below # result = ray2_groupby.tip_amount.mean() # print(' mean on groupby finished. Result:')
''' a = pd.DataFrame([1,2,3]) print(a[0:10]) dates = ts.trade_cal() test =dates['calendarDate'].tolist() d= {"date":test} open_dates = dates.loc[dates['isOpen']==1].reset_index() test.reverse() d['date'].reverse() end = open_dates.loc[open_dates.loc[open_dates['calendarDate']=='2014-03-26'].index +10, 'calendarDate'] print(end.values[0]) ''' a = clock() k_data = pd.read_csv('/home/wxl/p/StockBrain/backend/data/ashareeodprices.csv', low_memory=False) stock = k_data.query('S_INFO_WINDCODE == "600031.SH"') stocks = stock[:] stocks['TRADE_DT'] = stocks['TRADE_DT'].apply(lambda x: str(x)[ 0:4] + "-" + str(x)[4:6] + "-" + str(x)[6:9]) #, meta=('TRADE_DT', 'str') stocks.set_index('TRADE_DT', inplace=True) b = clock() #print('finish reading') #pre_k_data = dd.from_pandas(pre_k_data, npartitions = nCores) #pre_k_data['TRADE_DT'] = pre_k_data.map_partitions(lambda df: df['TRADE_DT'].apply(lambda x: str(x)[0:4]+"-"+str(x)[4:6]+"-"+str(x)[6:9]) ) #k_data['TRADE_DT'] = k_data['TRADE_DT'].apply(lambda x: str(x)[0:4]+"-"+str(x)[4:6]+"-"+str(x)[6:9]) #print('finish design') #pre_k_data.compute(get=get) #stock = k_data.loc[k_data['S_INFO_WINDCODE'] == '600031.SH'] #print(stock.index)