def evaluate_steady_state(images_root, results_root, device): target_file = path.join(images_root, get_npr_general_proxy_file()) num_steps = 200_000 target_image = read_image(target_file).to(device) level_steps = (0, num_steps) print_steps = intgeomspace(1, num_steps, num=1000) for ssim_loss in (False, True): with record_nst(quiet=True) as recorder: perform_ncr( target_image, level_steps=level_steps, quiet=False, print_steps=print_steps, ssim_loss=ssim_loss, diagnose_ssim_score=True, ) df = recorder.extract() loss_type = "SSIM" if ssim_loss else "SE" df = df.rename( columns={f"Content loss ({loss_type})": "loss", "SSIM score": "ssim_score"} ) df = df[["ssim_score", "loss"]] df = df.dropna(axis="index", how="all") file = f"{loss_type.lower()}.csv" file = path.join(results_root, "steady_state", "raw", file) df_to_csv(df, file, index=False)
def process_ssim_window(results_root): ncr_benchmark_root = path.join(results_root, "ssim_window") df = pd.read_csv(path.join(ncr_benchmark_root, "raw.csv"), index_col=0) df = df.groupby(["window_type", "output_shape", "radius"]).median() df = df.drop("seed", axis="columns") df_to_csv(df, path.join(ncr_benchmark_root, "processed.csv"))
def _delte_order(self, term, child_order_acceptance_id): self.child_orders[term].drop(index=[child_order_acceptance_id], inplace=True) # csvファイルを更新 if len(self.child_orders[term]) == 0: rm_file(self.p_child_orders_path[term]) else: df_to_csv(str(self.p_child_orders_path[term]), self.child_orders[term], index=True) logger.debug(f'{str(self.p_child_orders_path[term])} が更新されました。')
def write_input_CSVs(): dir = "./semeval_data" ud_test = dir + "/dep-stx/pos-gold-dep-auto.conll.txt" task1_test = dir + "/test/task-1.txt" # task21_test = dir+"/test/task-2.1.txt" task22_test = dir + "/test/task-2.2.txt" #----------------------------------------------- ud_dev = dir + "/dep-stx/pos-gold-dep-auto.conll.txt" task1_dev = dir + "/dev/task-1.txt" # task21_dev = dir+"/dev/task-2.1.txt" task22_dev = dir + "/dev/task-2.2.txt" all_sentences_dev = './input/models/sentences_dev.txt' print('writing sentences for dev') ud2csv.ud_sentences_to_file(ud_dev, all_sentences_dev) print('writing csvs for dev') csv_dev = './input/train_task1_dev.csv' csv_gd_dev = './input/gd_task1_dev.csv' ud2csv.task1_to_csv(task1_dev, ud_dev, csv_dev) ud2csv.task1_to_csv_gd(task1_dev, ud_dev, csv_gd_dev) csv_task22_dev = './input/train_task22_dev.csv' ud2csv.task22_to_csv(task22_dev, ud_dev, csv_task22_dev) csv_gr_dev = './input/all_grammaticalLabels_dev.csv' df_task22 = ud2csv.task22_to_df_withFrameArgsDependencies( task22_dev, ud_dev) df_to_csv(df_task22, csv_gr_dev) # ------------------------------------------------------------- Test print('writing csvs for test') csv_test = './input/train_task1_test.csv' csv_gd_test = './input/gd_task1_test.csv' ud2csv.task1_to_csv(task1_test, ud_test, csv_test) ud2csv.task1_to_csv_gd(task1_test, ud_test, csv_gd_test) csv_task22_test = './input/train_task22_test.csv' ud2csv.task22_to_csv(task22_test, ud_test, csv_task22_test) csv_gr_test = './input/all_grammaticalLabels_test.csv' df_task22 = ud2csv.task22_to_df_withFrameArgsDependencies( task22_test, ud_test) df_to_csv(df_task22, csv_gr_test)
def update_unrealized_profit(self, term): if not self.child_orders[term].empty: self.child_orders[term]['profit'] = self.child_orders[term]['size'] \ * (self.latest_summary['BUY']['now']['price'] - self.child_orders[term]['price']) \ - self.child_orders[term]['total_commission_yen'] self.child_orders[term].loc[ self.child_orders[term]['child_order_state'] == 'ACTIVE', 'profit'] = 0 self.child_orders[term]['cumsum_profit'] = self.child_orders[term][ 'profit'].cumsum() # csvファイルを更新 df_to_csv(str(self.p_child_orders_path[term]), self.child_orders[term], index=True) logger.debug(f'{str(self.p_child_orders_path[term])} が更新されました。')
def benchmark_ncr(images_root, results_root, device): target_files = get_npr_general_files() ssim_component_weight_ratios = (0.0, 3.0, 9.0, np.inf) num_seeds = 5 loss_variations = [ (True, ssim_component_weight_ratio) for ssim_component_weight_ratio in ssim_component_weight_ratios ] loss_variations = [(False, None)] + loss_variations seeds = np.arange(num_seeds) calculate_ssim_score = SimplifiedMSSIM().to(device) data = [] for target_file in target_files: target_name = path.splitext(path.basename(target_file))[0] target_image = read_image(path.join(images_root, target_file)).to(device) eval_transform = get_eval_transform(target_image) target_image_eval = eval_transform(target_image) for loss_variation, seed in itertools.product(loss_variations, seeds): ssim_loss, ssim_component_weight_ratio = loss_variation output_image = perform_ncr( target_image, seed=seed, ssim_loss=ssim_loss, ssim_component_weight_ratio=ssim_component_weight_ratio, ) output_image_eval = eval_transform(output_image) mssim = calculate_ssim_score(output_image_eval, target_image_eval) ssim_score = mssim.cpu().item() data.append( (target_name, ssim_loss, ssim_component_weight_ratio, seed, ssim_score) ) columns = ("name", "ssim_loss", "ssim_component_weight_ratio", "seed", "ssim_score") df = pd.DataFrame.from_records(data, columns=columns) file = path.join(results_root, "ncr_benchmark", "raw.csv") df_to_csv(df, file)
def process_ncr_benchmark(results_root): ncr_benchmark_root = path.join(results_root, "ncr_benchmark") df = pd.read_csv(path.join(ncr_benchmark_root, "raw.csv"), index_col=0) def create_loss_str(row): ssim_loss, ssim_component_weight_ratio = row if not ssim_loss: return "SE loss" return f"SSIM_loss__component_weight_ratio_{ssim_component_weight_ratio:g}" loss_columns = ["ssim_loss", "ssim_component_weight_ratio"] df = df.assign(loss_type=df[loss_columns].apply(create_loss_str, axis="columns")) df = df.drop(columns=loss_columns) df = df.groupby(["name", "loss_type"]).median() df = df.drop(columns="seed").unstack() df_to_csv(df, path.join(results_root, "ncr_benchmark", "processed.csv"))
def evaluate_ssim_window(images_root, results_root, device): target_file = path.join(images_root, get_npr_general_proxy_file()) window_types = ("gauss", "box") output_shapes = ("same", "valid") radii = range(1, 10) num_seeds = 5 target_image = read_image(target_file).to(device) eval_transform = get_eval_transform(target_image) target_image_eval = eval_transform(target_image) def get_image_filter(window_type, output_shape, radius): kwargs = {"output_shape": output_shape, "padding_mode": "replicate"} if window_type == "gauss": return GaussFilter(radius=radius, std=radius / 3.0, **kwargs) else: # filter_type == "box" return BoxFilter(radius=radius, **kwargs) seeds = range(num_seeds) calculate_mssim = SimplifiedMSSIM().to(device) data = [] for image_filter_params in itertools.product(window_types, output_shapes, radii): image_filter = get_image_filter(*image_filter_params) for seed in seeds: kwargs = {"seed": seed, "image_filter": image_filter} output_image = perform_ncr(target_image, **kwargs) output_image_eval = eval_transform(output_image) mssim = calculate_mssim(output_image_eval, target_image_eval) ssim_score = mssim.cpu().item() data.append((*image_filter_params, seed, ssim_score)) columns = ("window_type", "output_shape", "radius", "seed", "ssim_score") df = pd.DataFrame.from_records(data, columns=columns) file = path.join(results_root, "ssim_window", "raw.csv") df_to_csv(df, file)
def load_latest_child_orders(self, term, child_order_cycle, child_order_acceptance_id, related_child_order_acceptance_id='no_id'): logger.debug(f'child_order_acceptance_id: {child_order_acceptance_id}') # get a child order from api child_orders_tmp = pd.DataFrame() start_time = time.time() while child_orders_tmp.empty: child_orders_tmp = get_child_orders( product_code=self.product_code, region='Asia/Tokyo', child_order_acceptance_id=child_order_acceptance_id) if time.time() - start_time > 5: logger.warning( f'{child_order_acceptance_id} はすでに存在しないため、ファイルから削除します。') self._delte_order( term=term, child_order_acceptance_id=child_order_acceptance_id) return child_orders_tmp['child_order_cycle'] = child_order_cycle child_orders_tmp[ 'related_child_order_acceptance_id'] = related_child_order_acceptance_id child_orders_tmp['total_commission_yen'] = 0 child_orders_tmp['profit'] = 0 child_orders_tmp[ 'volume'] = child_orders_tmp['price'] * child_orders_tmp['size'] if self.child_orders[term].empty: self.child_orders[term] = child_orders_tmp else: self.child_orders[term].loc[ child_order_acceptance_id] = child_orders_tmp.loc[ child_order_acceptance_id] if self.child_orders[term].at[child_order_acceptance_id, 'child_order_state'] == 'COMPLETED': # 取引手数料を算出 total_commission = self.child_orders[term].at[ child_order_acceptance_id, 'total_commission'] price = self.child_orders[term].at[child_order_acceptance_id, 'price'] self.child_orders[term].at[ child_order_acceptance_id, 'total_commission_yen'] = price * total_commission if self.child_orders[term].at[child_order_acceptance_id, 'related_child_order_acceptance_id'] == 'no_id' \ or self.child_orders[term].at[child_order_acceptance_id, 'side'] == 'SELL': logger.info( f'[{self.product_code} {term} {child_order_cycle} {self.child_orders[term].at[child_order_acceptance_id, "side"]} {child_order_acceptance_id}] 約定しました!' ) if self.child_orders[term].at[child_order_acceptance_id, 'side'] == 'SELL': sell_price = self.child_orders[term].at[ child_order_acceptance_id, 'price'] sell_size = self.child_orders[term].at[ child_order_acceptance_id, 'size'] sell_commission = self.child_orders[term].at[ child_order_acceptance_id, 'total_commission_yen'] buy_price = self.child_orders[term].at[ related_child_order_acceptance_id, 'price'] buy_size = self.child_orders[term].at[ related_child_order_acceptance_id, 'size'] buy_commission = self.child_orders[term].at[ related_child_order_acceptance_id, 'total_commission_yen'] profit = sell_price * sell_size - buy_price * buy_size profit -= sell_commission + buy_commission logger.info( f'[{self.product_code} {term} {child_order_cycle}] {profit}円の利益が発生しました。' ) self.child_orders[term].at[child_order_acceptance_id, 'profit'] = profit self.child_orders[term]['cumsum_profit'] = self.child_orders[ term]['profit'].cumsum() # csvファイルを更新 df_to_csv(str(self.p_child_orders_path[term]), self.child_orders[term], index=True) logger.debug(f'{str(self.p_child_orders_path[term])} が更新されました。')
def calc_profit(product_code, child_orders, latest_summary): """利益を計算する関数 必要な取引価格 - 現在価格 - 前日の終値 - 前月の終値 - 前年の終値 Args: product_code ([type]): [description] child_orders ([type]): [description] current_datetime ([type]): [description] """ p_profit_dir = Path(PROFIT_DIR) p_daily_profit_path = p_profit_dir.joinpath('daily_profit.csv') p_monthly_profit_path = p_profit_dir.joinpath('monthly_profit.csv') p_yearly_profit_path = p_profit_dir.joinpath('yearly_profit.csv') current_datetime = datetime.datetime.now( datetime.timezone(datetime.timedelta(hours=9))) current_month_start_datetime = current_datetime.replace(day=1, hour=0, minute=0, second=0, microsecond=0) current_month_end_datetime = current_month_start_datetime + relativedelta( months=+1) current_month_start_datetime = pd.to_datetime(current_month_start_datetime) current_month_end_datetime = pd.to_datetime(current_month_end_datetime) current_year_start_datetime = current_datetime.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) current_year_end_datetime = current_year_start_datetime + relativedelta( years=+1) current_year_start_datetime = pd.to_datetime(current_year_start_datetime) current_year_end_datetime = pd.to_datetime(current_year_end_datetime) current_date = current_datetime.strftime('%Y/%m/%d') current_month = current_datetime.strftime('%Y/%m') current_year = current_datetime.strftime('%Y') if path_exists(p_daily_profit_path): df_daily_profit = read_csv(str(p_daily_profit_path)) df_daily_profit = df_daily_profit.set_index('date') rearlized_profit_all = 0 unrealized_profit_all = 0 if not child_orders['long'].empty: unrealized_profit_all += float( child_orders['long']['profit'].sum()) if not child_orders['dca'].empty: unrealized_profit_all += float( child_orders['long']['profit'].sum()) if not child_orders['short'].empty: rearlized_profit_all += float( child_orders['short']['profit'].sum()) df_active_sell_order = child_orders['short'].query( 'side == "SELL" and child_order_state == "ACTIVE"') if not df_active_sell_order.empty: child_order_acceptance_id_list = df_active_sell_order[ 'related_child_order_acceptance_id'].values.tolist() for child_order_acceptance_id in child_order_acceptance_id_list: unrealized_profit_all += (latest_summary['SELL']['now']['price'] - child_orders['short'].at[child_order_acceptance_id, 'price']) \ * child_orders['short'].at[child_order_acceptance_id, 'size'] \ - child_orders['short'].at[child_order_acceptance_id, 'total_commission_yen'] rearlized_profit = rearlized_profit_all unrealized_profit = unrealized_profit_all if len(df_daily_profit) >= 2: rearlized_profit -= float( df_daily_profit.loc[df_daily_profit.index != current_date, f'{product_code}_realized_profit'].sum()) unrealized_profit -= float( df_daily_profit.loc[df_daily_profit.index != current_date, f'{product_code}_unrealized_profit'].sum()) rearlized_profit = round(rearlized_profit, 1) unrealized_profit = round(unrealized_profit, 1) df_daily_profit.at[ current_date, f'{product_code}_total_profit'] = rearlized_profit + unrealized_profit df_daily_profit.at[ current_date, f'{product_code}_realized_profit'] = rearlized_profit df_daily_profit.at[ current_date, f'{product_code}_unrealized_profit'] = unrealized_profit unrealized_profit_list = [] realized_profit_list = [] total_profit_list = [] for col_num in df_daily_profit.columns.tolist(): if col_num.endswith('_unrealized_profit'): unrealized_profit_list.append(col_num) elif col_num.endswith('_realized_profit'): realized_profit_list.append(col_num) elif col_num.endswith('_total_profit'): total_profit_list.append(col_num) df_daily_profit = df_daily_profit.fillna(0) unrealized_profit_sum = df_daily_profit.loc[ current_date, unrealized_profit_list].values.sum() realized_profit_sum = df_daily_profit.loc[ current_date, realized_profit_list].values.sum() total_profit_sum = df_daily_profit.loc[current_date, total_profit_list].values.sum() df_daily_profit.at[current_date, 'total_profit'] = round(total_profit_sum, 1) df_daily_profit.at[current_date, 'realized_profit'] = round(realized_profit_sum, 1) df_daily_profit.at[current_date, 'unrealized_profit'] = round( unrealized_profit_sum, 1) df_to_csv(str(p_daily_profit_path), df_daily_profit, index=True) else: rearlized_profit = 0 unrealized_profit = 0 if not child_orders['long'].empty: unrealized_profit = child_orders['long']['cumsum_profit'].values[ -1] if not child_orders['dca'].empty: unrealized_profit += child_orders['dca']['cumsum_profit'].values[ -1] if not child_orders['short'].empty: rearlized_profit = child_orders['short']['cumsum_profit'].max() df_active_sell_order = child_orders['short'].query( 'side == "SELL" and child_order_state == "ACTIVE"') if not df_active_sell_order.empty: child_order_acceptance_id_list = df_active_sell_order[ 'related_child_order_acceptance_id'].values.tolist() for child_order_acceptance_id in child_order_acceptance_id_list: unrealized_profit += (latest_summary['SELL']['now']['price'] - child_orders['short'][child_order_acceptance_id, 'price']) \ * child_orders['short'][child_order_acceptance_id, 'size'] \ - child_orders['short'][child_order_acceptance_id, 'total_commission_yen'] rearlized_profit = round(rearlized_profit, 1) unrealized_profit = round(unrealized_profit, 1) daily_profit = [ { 'date': current_date, 'total_profit': rearlized_profit + unrealized_profit, 'realized_profit': rearlized_profit, 'unrealized_profit': unrealized_profit, f'{product_code}_total_profit': rearlized_profit + unrealized_profit, f'{product_code}_realized_profit': rearlized_profit, f'{product_code}_unrealized_profit': unrealized_profit, }, ] df_daily_profit = pd.DataFrame(daily_profit) df_daily_profit = df_daily_profit.set_index('date') df_to_csv(str(p_daily_profit_path), df_daily_profit, index=True) df_daily_profit.index = pd.to_datetime(df_daily_profit.index) df_daily_profit.index = df_daily_profit.index.tz_localize('Asia/Tokyo') df_daily_profit_current_month = df_daily_profit[ current_month_start_datetime:current_month_end_datetime] df_daily_profit_current_month_sum = df_daily_profit_current_month.sum() if path_exists(p_monthly_profit_path): df_monthly_profit = read_csv(str(p_monthly_profit_path)) df_monthly_profit = df_monthly_profit.set_index('date') if current_month in df_monthly_profit.index.tolist(): current_month_sum_dict = df_daily_profit_current_month_sum.to_dict( ) for col_name, val in current_month_sum_dict.items(): if col_name in df_monthly_profit.columns: df_monthly_profit.at[current_month, col_name] = val else: df_monthly_profit[col_name] = val else: current_month_sum_dict = df_daily_profit_current_month_sum.to_dict( ) current_month_profit = [] for col_name in df_monthly_profit.columns.tolist(): if col_name in current_month_sum_dict.keys(): current_month_profit.append( current_month_sum_dict[col_name]) else: current_month_profit.append(0) df_monthly_profit.loc[current_month] = current_month_profit df_to_csv(str(p_monthly_profit_path), df_monthly_profit, index=True) else: current_month_profit_dict = {'date': current_month} current_month_profit_dict.update( df_daily_profit_current_month_sum.to_dict()) df_monthly_profit = pd.DataFrame([current_month_profit_dict]) df_monthly_profit = df_monthly_profit.set_index('date') df_to_csv(str(p_monthly_profit_path), df_monthly_profit, index=True) df_monthly_profit.index = pd.to_datetime(df_monthly_profit.index) df_monthly_profit.index = df_monthly_profit.index.tz_localize('Asia/Tokyo') df_monthly_profit_current_year = df_monthly_profit[ current_year_start_datetime:current_year_end_datetime] df_monthly_profit_current_year_sum = df_monthly_profit_current_year.sum() if path_exists(p_yearly_profit_path): df_yearly_profit = read_csv(str(p_yearly_profit_path)) df_yearly_profit['date'] = df_yearly_profit['date'].astype(str) df_yearly_profit = df_yearly_profit.set_index('date') if current_year in df_yearly_profit.index.tolist(): current_year_sum_dict = df_monthly_profit_current_year_sum.to_dict( ) for col_name, val in current_year_sum_dict.items(): if col_name in df_yearly_profit.columns: df_yearly_profit.at[current_year, col_name] = val else: df_yearly_profit[col_name] = val else: current_year_sum_dict = df_monthly_profit_current_year_sum.to_dict( ) current_year_profit = [] for col_name in df_yearly_profit.columns.tolist(): if col_name in current_year_sum_dict.keys(): current_year_profit.append(current_year_sum_dict[col_name]) else: current_year_profit.append(0) df_yearly_profit.loc[current_year] = current_year_profit df_to_csv(str(p_yearly_profit_path), df_yearly_profit, index=True) else: current_year_profit_dict = {'date': current_year} current_year_profit_dict.update( df_monthly_profit_current_year_sum.to_dict()) df_yearly_profit = pd.DataFrame([current_year_profit_dict]) df_yearly_profit = df_yearly_profit.set_index('date') df_to_csv(str(p_yearly_profit_path), df_yearly_profit, index=True)
def calc_volume(product_code, child_orders): """取引量を計算する関数 Args: product_code ([type]): [description] child_orders ([type]): [description] current_datetime ([type]): [description] """ p_volume_dir = Path(VOLUME_DIR) p_daily_volume_path = p_volume_dir.joinpath('daily_volume.csv') p_monthly_volume_path = p_volume_dir.joinpath('monthly_volume.csv') p_yearly_volume_path = p_volume_dir.joinpath('yearly_volume.csv') current_datetime = datetime.datetime.now( datetime.timezone(datetime.timedelta(hours=9))) current_month_start_datetime = current_datetime.replace(day=1, hour=0, minute=0, second=0, microsecond=0) current_month_end_datetime = current_month_start_datetime + relativedelta( months=+1) current_month_start_datetime = pd.to_datetime(current_month_start_datetime) current_month_end_datetime = pd.to_datetime(current_month_end_datetime) current_year_start_datetime = current_datetime.replace(month=1, day=1, hour=0, minute=0, second=0, microsecond=0) current_year_end_datetime = current_year_start_datetime + relativedelta( years=+1) current_year_start_datetime = pd.to_datetime(current_year_start_datetime) current_year_end_datetime = pd.to_datetime(current_year_end_datetime) current_date = current_datetime.strftime('%Y/%m/%d') current_month = current_datetime.strftime('%Y/%m') current_year = current_datetime.strftime('%Y') buy_volume_all = 0 sell_volume_all = 0 if not child_orders['short'].empty: df_buy_volume = child_orders['short'].loc[ child_orders['short']['side'] == 'BUY', 'volume'] df_sell_volume = child_orders['short'].loc[ child_orders['short']['side'] == 'SELL', 'volume'] if not df_buy_volume.empty: buy_volume_all += float(df_buy_volume.sum()) if not df_sell_volume.empty: sell_volume_all += float(df_sell_volume.sum()) if not child_orders['long'].empty: df_buy_volume = child_orders['long']['volume'] if not df_buy_volume.empty: buy_volume_all += float(df_buy_volume.sum()) if path_exists(p_daily_volume_path): df_daily_volume = read_csv(str(p_daily_volume_path)) df_daily_volume = df_daily_volume.set_index('date') buy_volume = buy_volume_all sell_volume = sell_volume_all if len(df_daily_volume) >= 2: buy_volume -= float( df_daily_volume.loc[df_daily_volume.index != current_date, f'{product_code}_buy_volume'].sum()) sell_volume -= float( df_daily_volume.loc[df_daily_volume.index != current_date, f'{product_code}_sell_volume'].sum()) buy_volume = round(buy_volume, 1) sell_volume = round(sell_volume, 1) df_daily_volume.at[ current_date, f'{product_code}_total_volume'] = buy_volume + sell_volume df_daily_volume.at[current_date, f'{product_code}_buy_volume'] = buy_volume df_daily_volume.at[current_date, f'{product_code}_sell_volume'] = sell_volume buy_volume_list = [] sell_volume_list = [] total_volume_list = [] for col_num in df_daily_volume.columns.tolist(): if col_num.endswith('_buy_volume'): buy_volume_list.append(col_num) elif col_num.endswith('_sell_volume'): sell_volume_list.append(col_num) elif col_num.endswith('_total_volume'): total_volume_list.append(col_num) df_daily_volume = df_daily_volume.fillna(0) total_volume_sum = df_daily_volume.loc[current_date, total_volume_list].values.sum() buy_volume_sum = df_daily_volume.loc[current_date, buy_volume_list].values.sum() sell_volume_sum = df_daily_volume.loc[current_date, sell_volume_list].values.sum() df_daily_volume.at[current_date, 'total_volume'] = round(total_volume_sum, 1) df_daily_volume.at[current_date, 'buy_volume'] = round(buy_volume_sum, 1) df_daily_volume.at[current_date, 'sell_volume'] = round(sell_volume_sum, 1) df_to_csv(str(p_daily_volume_path), df_daily_volume, index=True) else: buy_volume_all = round(buy_volume_all, 1) sell_volume_all = round(sell_volume_all, 1) daily_volume = [ { 'date': current_date, 'total_volume': buy_volume_all + sell_volume_all, 'buy_volume': buy_volume_all, 'sell_volume': sell_volume_all, f'{product_code}_total_volume': buy_volume_all + sell_volume_all, f'{product_code}_buy_volume': buy_volume_all, f'{product_code}_sell_volume': sell_volume_all, }, ] df_daily_volume = pd.DataFrame(daily_volume) df_daily_volume = df_daily_volume.set_index('date') df_to_csv(str(p_daily_volume_path), df_daily_volume, index=True) df_daily_volume.index = pd.to_datetime(df_daily_volume.index) df_daily_volume.index = df_daily_volume.index.tz_localize('Asia/Tokyo') df_daily_volume_current_month = df_daily_volume[ current_month_start_datetime:current_month_end_datetime] df_daily_volume_current_month_sum = df_daily_volume_current_month.sum() if path_exists(p_monthly_volume_path): df_monthly_volume = read_csv(str(p_monthly_volume_path)) df_monthly_volume = df_monthly_volume.set_index('date') if current_month in df_monthly_volume.index.tolist(): current_month_sum_dict = df_daily_volume_current_month_sum.to_dict( ) for col_name, val in current_month_sum_dict.items(): if col_name in df_monthly_volume.columns: df_monthly_volume.at[current_month, col_name] = val else: df_monthly_volume[col_name] = val else: current_month_sum_dict = df_daily_volume_current_month_sum.to_dict( ) current_month_volume = [] for col_name in df_monthly_volume.columns.tolist(): if col_name in current_month_sum_dict.keys(): current_month_volume.append( current_month_sum_dict[col_name]) else: current_month_volume.append(0) df_monthly_volume.loc[current_month] = current_month_volume df_to_csv(str(p_monthly_volume_path), df_monthly_volume, index=True) else: current_month_volume_dict = {'date': current_month} current_month_volume_dict.update( df_daily_volume_current_month_sum.to_dict()) df_monthly_volume = pd.DataFrame([current_month_volume_dict]) df_monthly_volume = df_monthly_volume.set_index('date') df_to_csv(str(p_monthly_volume_path), df_monthly_volume, index=True) df_monthly_volume.index = pd.to_datetime(df_monthly_volume.index) df_monthly_volume.index = df_monthly_volume.index.tz_localize('Asia/Tokyo') df_monthly_volume_current_year = df_monthly_volume[ current_year_start_datetime:current_year_end_datetime] df_monthly_volume_current_year_sum = df_monthly_volume_current_year.sum() if path_exists(p_yearly_volume_path): df_yearly_volume = read_csv(str(p_yearly_volume_path)) df_yearly_volume['date'] = df_yearly_volume['date'].astype(str) df_yearly_volume = df_yearly_volume.set_index('date') if current_year in df_yearly_volume.index.tolist(): current_year_sum_dict = df_monthly_volume_current_year_sum.to_dict( ) for col_name, val in current_year_sum_dict.items(): if col_name in df_yearly_volume.columns: df_yearly_volume.at[current_year, col_name] = val else: df_yearly_volume[col_name] = val else: current_year_sum_dict = df_monthly_volume_current_year_sum.to_dict( ) current_year_volume = [] for col_name in df_yearly_volume.columns.tolist(): if col_name in current_year_sum_dict.keys(): current_year_volume.append(current_year_sum_dict[col_name]) else: current_year_volume.append(0) df_yearly_volume.loc[current_year] = current_year_volume df_to_csv(str(p_yearly_volume_path), df_yearly_volume, index=True) else: current_year_volume_dict = {'date': current_year} current_year_volume_dict.update( df_monthly_volume_current_year_sum.to_dict()) df_yearly_volume = pd.DataFrame([current_year_volume_dict]) df_yearly_volume = df_yearly_volume.set_index('date') df_to_csv(str(p_yearly_volume_path), df_yearly_volume, index=True)
def task1_to_csv_gd(task1_file, udfile, target_file): df = task1_to_df_gd(task1_file, udfile) df_to_csv(df, target_file) return df
def task22_to_csv(task22_file, udfile, target_file): df = task22_to_df(task22_file, udfile) df_to_csv(df, target_file) return df
model.load_weights(model_type + "_stock_" + input_output_type + "_inference.h5") testing_score = model_score(model, X_train, Y_train) info("=================================") info("Testing MSE: %.5f%%" % testing_score) info("=================================") predicted_close = model.predict(final_test_for_all) print(predicted_close.shape) predicted_close = stock_rnn_model.get_avereage_predicted_close(predicted_close, "mean") predicted_close_df = pd.DataFrame({'close': predicted_close[:, 0]}) predicted_norm_df = pd.concat([first_half_close_df, predicted_close_df], ignore_index=True) predicted_df = denormalize_dataframe(close_df, predicted_norm_df) info("Predicted close for whole dataset:") print(predicted_df) info("Change percentage error: %s%%" % get_change_error(close_df, predicted_df)) close_list = close_df['close'].values.tolist() close_list = close_list + get_nan_price(future_day) predicted_df['raw_close'] = close_list date_list = raw_df['date'].values.tolist() date_list = date_list + get_prediction_date(future_day) predicted_df['date'] = date_list utils.df_to_csv(predicted_df, "./diff.csv") if show_interval != "all": close_df = close_df.tail(show_interval) predicted_df = predicted_df.tail(show_interval) info("Change percentage error: %s%%" % get_change_error(close_df, predicted_df)) plot_stock(predicted_df)
info("Load default 2330 csv.") default_df = utils.read_csv_to_df(default_csv) #print(default_df) info("Load downloaded 2330 csv.") new_df = utils.read_csv_to_df(download_csv) rename_df = rename_dataframe(new_df).iloc[::-1].reset_index(drop=True) rename_df = rename_date(rename_df) #print(rename_df) info("Try to get detail information from other websites.") detail_info_df = get_detail_info_df(rename_df) detail_info_df = rename_date(detail_info_df) #print(detail_info_df) info("Try to merge downloaded csv and detail information.") default_last_date = default_df.tail(1).iloc[0]['date'] recent_df = merge_rename_df_and_detail_info_df(default_last_date, rename_df, detail_info_df) #print(recent_df) recent_df = calculate_ma_and_bband(default_df, recent_df) info("Create new csv successfully.") info("Try to merge default csv and new csv.") final_df = pd.concat([default_df, recent_df], ignore_index=True) final_df = final_df.dropna() utils.df_to_csv(final_df, merged_csv) print(final_df) info("Check the file: %s" % merged_csv) info("Create lastest csv successfully.")
# aggregate and save flow # flow is saved to file in overwrite mode print('Total number of points:', len(recovered_trajectory_df)) start_time = time.time() i = -1 # dummy for i, row in recovered_trajectory_df.iterrows(): if i < checkpoint: pass elif i == 0: flow_df.loc[round_time(row['time'], interval=interval), row['road_id']] += 1 else: if i % 10000 == 0: print('Saving result at index %s. Time spent: %s s' % (i, int(time.time() - start_time))) df_to_csv(flow_df, flow_path, index=True) with open(checkpoint_path, 'w') as f: f.write(str(i)) if row['vehicle_id'] != previous_vehicle_id or row[ 'trajectory_id'] != previous_trajectory_id: # new trajectory flow_df.loc[round_time(row['time'], interval=interval), row['road_id']] += 1 elif row['road_id'] != previous_road_id: # appear in this road flow_df.loc[round_time(row['time'], interval=interval), row['road_id']] += 1 previous_vehicle_id, previous_trajectory_id, previous_road_id = row[ 'vehicle_id'], row['trajectory_id'], row['road_id'] print('Saving result at index', i) df_to_csv(flow_df, flow_path, index=True) with open(checkpoint_path, 'w') as f: f.write(str(i))