def __init__(self): self.price_idx = 0 self.trade = TradeClass() price_data = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(price_data[0:10])) print("price_data idx last 10" + str(price_data[-1])) input_len = 400 n_actions = 3 obs_size = input_len + n_actions # shape#env.observation_space.shape[0] training_set = copy.copy(price_data) X_train = [] y_train = [] for i in range(input_len, len(training_set) - 1001): # X_train.append(np.flipud(training_set_scaled[i-60:i])) X_train.append(training_set[i - input_len:i]) y_train.append(training_set[i]) price = y_train money = 300 before_money = money ethereum = 0.01 total_money = money + np.float64(price[0] * ethereum) first_total_money = total_money pass_count = 0 buy_sell_count = 0 # buy+ sell - pass_renzoku_count = 0
def create_data(): trade = TradeClass() # time_date, price_data = trade.getDataPoloniex() num_history_data = trade.read_bitflyer_json() X = [] for idx in range(0, len(num_history_data) - INPUT_LEN - SEQUENTIAL_NUM): tmp = [ num_history_data[idx + list_idx] for list_idx in range(0, INPUT_LEN) ] X.append(tmp) #次の入力データXの最新のデータを、Xの教師Yとする。 Y = [] tmp = [] for i in range(INPUT_LEN, len(num_history_data) - SEQUENTIAL_NUM): for j in range(0, SEQUENTIAL_NUM): tmp.append(float(num_history_data[i + j])) Y.append(tmp) tmp = [] print(Y[0]) print("len(Y)" + str(len(Y))) print("len(X))" + str(len(X))) #XとYの配列の長さが等しくなるように、Xの要素を一つ削除する 。 return X, Y
def __init__(self): self.price_idx = 0 self.trade = TradeClass() training_set = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(training_set[0:10])) print("price_data idx last 10" + str(training_set[-1])) self.input_len = 400 self.n_actions = 3 self.obs_size = self.input_len + self.n_actions #env.observation_space.shape[0] self.X_train = [] self.y_train = [] for i in range(self.input_len, len(training_set) - 1001): # X_train.append(np.flipud(training_set_scaled[i-60:i])) self.X_train.append(training_set[i - self.input_len:i]) self.y_train.append(training_set[i]) self.price = self.y_train self.money = 300 self.before_money = copy.deepcopy(self.money) self.cripto = 0.1 self.total_money = self.money + np.float64(self.price[0] * self.cripto) self.first_total_money = self.total_money self.pass_count = 0 self.buy_sell_count = 0 # buy+ sell - self.pass_renzoku_count = 0 self.buy_sell_fee = 0.00001
def __init__(self): self.price_idx = 0 self.trade = TradeClass() training_set = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(training_set[0:10])) print("price_data idx last 10" + str(training_set[-1])) self.length_data = len(training_set) self.input_len = 400 self.n_actions = 3 self.asset_info_len = 2 self.buy_sell_count_len = 4 #TODO self.observe_size = self.input_len + self.buy_sell_count_len #+self.asset_info_len #env.observation_space.shape[0] self.data = [] self.y_train = [] for i in range(self.input_len, self.length_data - 1001): # data.append(np.flipud(training_set_scaled[i-60:i])) self.data.append(training_set[i - self.input_len:i]) self.y_train.append(training_set[i]) self.price = self.y_train self.money = 500 self.before_money = copy.deepcopy(self.money) self.cripto = 0.00001 self.total_money = self.money + np.float64(self.price[0] * self.cripto) self.first_total_money = self.total_money self.pass_count = 0 self.buy_sell_count = 0 # buy+ sell - self.pass_renzoku_count = 0 self.buy_sell_fee = 0.00001 self.current_asset = [self.cripto, self.money] self.action_space = gym.spaces.Discrete(3) # 東西南北 self.MAP = np.array([0 for idx in range(0, self.observe_size)]) self.inventory = [] self.observation_space = gym.spaces.Box(low=0, high=3, shape=self.MAP.shape) self.begin_total_money = self.money + self.cripto * self.price[0] print("LENGTH OF LOOP NUM:" + str(len(self.data))) self.buy_inventory = [] self.sell_inventory = [] self.total_profit = 0
import numpy as np import random import sys, os, copy, traceback from trade_class import TradeClass #from trade_class import buy_simple, sell_simple, pass_simple #from trade_class import SellAndCalcAmoutUsingPrediction,BuyAndCalcAmoutUsingPrediction cp = cuda.cupy from sklearn import preprocessing ss = preprocessing.StandardScaler() print(os.path.basename(__file__)) tradecl = TradeClass() price_data = tradecl.ReadPoloniexCSV() np.set_printoptions(threshold=np.inf) print("price_data idx 0-10" + str(price_data[0:10])) print("price_data idx last 10" + str(price_data[-1])) input_price_len = 400 input_discrete_value_size = 3 total_input_size = input_price_len + input_discrete_value_size n_actions = 3 # obs_size = input_len+n_actions#shape#env.observation_space.shape[0] def standarization(x, axis=None): x = np.array(x)
#coding: utf-8 #https://qiita.com/aaatsushi_bb/items/0b605c0f27493f005c88 import numpy from trade_class import TradeClass import xgboost as xgb import copy import numpy as np from sklearn.grid_search import GridSearchCV import matplotlib as plt import matplotlib.pyplot as plt trade = TradeClass() time_date, price_data = trade.getDataPoloniex() max_price = max(price_data) training_set = copy.copy(price_data) X_train = [] y_train = [] for i in range(60, len(training_set) - 10001): #X_train.append(np.flipud(training_set_scaled[i-60:i])) X_train.append(training_set[i - 60:i]) y_train.append(training_set[i]) X_train, y_train = trade.PercentageLabel(X_train, y_train) X_train, y_train = np.array(X_train, dtype='float64'), np.array(y_train, dtype='float64') from scipy.sparse import coo_matrix
class FxEnv(gym.Env): def __init__(self): self.price_idx = 0 self.trade = TradeClass() training_set = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(training_set[0:10])) print("price_data idx last 10" + str(training_set[-1])) self.input_len = 400 self.n_actions = 3 self.obs_size = self.input_len + self.n_actions #env.observation_space.shape[0] self.X_train = [] self.y_train = [] for i in range(self.input_len, len(training_set) - 1001): # X_train.append(np.flipud(training_set_scaled[i-60:i])) self.X_train.append(training_set[i - self.input_len:i]) self.y_train.append(training_set[i]) self.price = self.y_train self.money = 300 self.before_money = copy.deepcopy(self.money) self.cripto = 0.1 self.total_money = self.money + np.float64(self.price[0] * self.cripto) self.first_total_money = self.total_money self.pass_count = 0 self.buy_sell_count = 0 # buy+ sell - self.pass_renzoku_count = 0 self.buy_sell_fee = 0.00001 def _reset(self): self.price_idx = 0 return self.X_train[self.price_idx] def _seed(self, seed=None): pass def buy_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = money * 0.1 money -= spend * (1 + self.buy_sell_fee) if money <= 0.0: return first_money, first_cripto, first_total_money cripto += float(spend / current_price) total_money = money + cripto * current_price return money, cripto, total_money def sell_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = cripto * 0.1 cripto -= spend * (1 + self.buy_sell_fee) if cripto <= 0.0: return first_money, first_cripto, first_total_money money += float(spend * current_price) total_money = money + float(cripto * current_price) return money, cripto, total_money def pass_simple(self, money, cripto, total_money, current_price): total_money = money + float(cripto * current_price) return money, cripto, total_money def _step(self, action): self.price_idx += 1 current_price = self.X_train[self.price_idx][-1] buy_sell_num_array = [ 1.0, 0.0, self.buy_sell_count ] if self.buy_sell_count >= 1 else [0.0, 1.0, self.buy_sell_count] self.trade.update_trading_view(current_price, action) pass_reward = 0 if action == 0: print("buy") self.buy_sell_count += 1 self.money, self.cripto, self.total_money = self.buy_simple( self.money, self.cripto, self.total_money, current_price) elif action == 1: print("sell") self.buy_sell_count -= 1 self.money, self.cripto, self.total_money = self.sell_simple( self.money, self.cripto, self.total_money, current_price) else: print("PASS") self.money, self.cripto, self.total_money = self.pass_simple( self.money, self.cripto, self.total_money, current_price) pass_reward = 0.0 # -0.001)#0.01 is default self.pass_count += 1 reward = self.total_money - self.before_money + pass_reward if self.buy_sell_count >= 5 and action == 0: print("buy_sell" + str(self.buy_sell_count) + "回 action==" + str(action)) reward -= (float(abs(self.buy_sell_count)**2)) print(reward) elif self.buy_sell_count <= -5 and action == 1: print("buy_sell" + str(self.buy_sell_count) + "回 action==" + str(action)) reward -= (float(abs(self.buy_sell_count)**2)) print(reward) else: # reward 1.0がちょうど良い! reward += 1.1 self.before_money = copy.deepcopy(self.total_money) if self.price_idx % 2000 == 1000: print("last action:" + str(action)) print("TOTAL MONEY" + str(self.total_money)) print("100回中passは" + str(self.pass_count) + "回") # print("100回中buy_sell_countは" + str(self.buy_sell_count) + "回") self.pass_count = 0 self.trade.draw_trading_view() current_asset = [self.cripto, self.money] # obs, reward, done, infoを返す return [ self.X_train[self.price_idx], buy_sell_num_array, current_asset ], reward, False, None
#coding: utf-8 import numpy as np import matplotlib.pyplot as plt import time import datetime import copy from trade_class import TradeClass trade = TradeClass() time_date, price_data = trade.getDataPoloniex() max_price = max(price_data) training_set = copy.copy(price_data) X_train = [] y_train = [] for i in range(60, len(training_set) - 10001): #X_train.append(np.flipud(training_set_scaled[i-60:i])) X_train.append(training_set[i - 60:i]) y_train.append(training_set[i]) X_train, y_train = trade.PercentageLabel(X_train, y_train) X_train, y_train = np.array(X_train, dtype='float64'), np.array(y_train, dtype='float64') from scipy.sparse import coo_matrix X_sparse = coo_matrix(X_train)
if bid_ask == "bid": print("cleaning order Bid Try"+str(counter)+"回目") bids_top_price = public_zaif.depth('btc_jpy')['bids'][0][0] limit_price = bids_top_price + counter*5 else: print("cleaning order Ask Try"+str(counter)+"回目") asks_top_price = public_zaif.depth('btc_jpy')['asks'][0][0] limit_price = asks_top_price - counter*5 leverage_zaif.change_position(type="futures", leverage_id=id, group_id=1, price=int(price), limit=int(limit_price)) print("Done. cleaning order") time.sleep(10) except: traceback.print_exc() print(sys.exc_info()[0]) trade=TradeClass() history=get_price() print("History data is here. execute get_price() function") print(history) #買いは+、売りは− leverage_pos_num = -33 for num in range(99999999999): #time.sleep(60*5)#本当は5分×60セット待たなければならない try: time.sleep(20) public_zaif = ZaifPublicApi() a = public_zaif.last_price(('btc_jpy')) print(str(a)) last_price = int(a['last_price'])
class FxEnv(gym.Env): def __init__(self): self.price_idx = 0 self.trade = TradeClass() training_set = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(training_set[0:10])) print("price_data idx last 10" + str(training_set[-1])) self.length_data = len(training_set) self.input_len = 400 self.n_actions = 3 self.asset_info_len = 2 self.buy_sell_count_len = 4 #TODO self.observe_size = self.input_len + self.buy_sell_count_len #+self.asset_info_len #env.observation_space.shape[0] self.data = [] self.y_train = [] for i in range(self.input_len, self.length_data - 1001): # data.append(np.flipud(training_set_scaled[i-60:i])) self.data.append(training_set[i - self.input_len:i]) self.y_train.append(training_set[i]) self.price = self.y_train self.money = 500 self.before_money = copy.deepcopy(self.money) self.cripto = 0.00001 self.total_money = self.money + np.float64(self.price[0] * self.cripto) self.first_total_money = self.total_money self.pass_count = 0 self.buy_sell_count = 0 # buy+ sell - self.pass_renzoku_count = 0 self.buy_sell_fee = 0.00001 self.current_asset = [self.cripto, self.money] self.action_space = gym.spaces.Discrete(3) # 東西南北 self.MAP = np.array([0 for idx in range(0, self.observe_size)]) self.inventory = [] self.observation_space = gym.spaces.Box(low=0, high=3, shape=self.MAP.shape) self.begin_total_money = self.money + self.cripto * self.price[0] print("LENGTH OF LOOP NUM:" + str(len(self.data))) self.buy_inventory = [] self.sell_inventory = [] self.total_profit = 0 def _reset(self): self.price_idx = 0 return self.data[self.price_idx] + [0, 0, 0, 0 ] #TODO: +self.current_asset def _seed(self, seed=None): return self.length_data def return_lenghth_steps(self): return self.length_data def _render(self, mode='', close=False): #画面への表示 主にGUI pass def buy_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = money * 0.1 money -= spend * (1 + self.buy_sell_fee) if money <= 0.0: return first_money, first_cripto, first_total_money cripto += float(spend / current_price) total_money = money + cripto * current_price return money, cripto, total_money def sell_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = cripto * 0.1 cripto -= spend * (1 + self.buy_sell_fee) if cripto <= 0.0: return first_money, first_cripto, first_total_money money += float(spend * current_price) total_money = money + float(cripto * current_price) return money, cripto, total_money def pass_simple(self, money, cripto, total_money, current_price): total_money = money + float(cripto * current_price) return money, cripto, total_money def buy_lot(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = current_price * 0.001 money -= spend * (1 + self.buy_sell_fee) EMPTY_MONEY_FLAG = False if money <= 0.0: EMPTY_MONEY_FLAG = True return first_money, first_cripto, first_total_money, EMPTY_MONEY_FLAG cripto += float(spend / current_price) total_money = money + cripto * current_price return money, cripto, total_money, EMPTY_MONEY_FLAG def sell_lot(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = cripto * 0.001 cripto -= spend * (1 + self.buy_sell_fee) EMPTY_MONEY_FLAG = False if cripto <= 0.0: EMPTY_MONEY_FLAG = True return first_money, first_cripto, first_total_money, EMPTY_MONEY_FLAG money += float(spend * current_price) total_money = money + float(cripto * current_price) return money, cripto, total_money, EMPTY_MONEY_FLAG def _step(self, action): if type(action) is list or type(action) is np.ndarray: action = action.tolist() action = action.index(max(action)) else: pass self.price_idx += 1 current_price = self.data[self.price_idx][-1] self.trade.update_trading_view(current_price, action) len_buy = len(self.buy_inventory) len_sell = len(self.sell_inventory) if len_buy > 40: buy_flag = 1 sell_flag = 0 elif len_sell > 40: buy_flag = 0 sell_flag = 1 else: buy_flag = 0 sell_flag = 0 buy_sell_array = [len_buy, len_sell, buy_flag, sell_flag] #TODO idx + 1じゃなくて良いか? バグの可能性あり。=>修正済み #next_state = getStateFromCsvData(self.data, self.price_idx+1, window_size) reward = 0 if action == 1 and len(self.sell_inventory) > 0: # sell i = 0 for i in range(0, int(len(self.sell_inventory) / 10)): sold_price = self.sell_inventory.pop(0) profit = sold_price - current_price reward = profit #max(profit, 0) self.total_profit += profit #print("Buy(空売りの決済): " + str(current_price) + " | Profit: " + str(profit)) reward = reward / (i + 1) elif action == 1 and len(self.buy_inventory) < 50: # buy self.buy_inventory.append(current_price) #print("Buy: " + str(current_price)) elif action == 2 and len(self.buy_inventory) > 0: # sell i = 0 for i in range(0, int(len(self.buy_inventory) / 10)): bought_price = self.buy_inventory.pop(0) profit = current_price - bought_price reward = profit # max(profit, 0) self.total_profit += profit #print("Sell: " + str(current_price) + " | Profit: " + formatPrice(profit)) reward = reward / (i + 1) elif action == 2 and len(self.sell_inventory) < 50: self.sell_inventory.append(current_price) #print("Sell(空売り): " + formatPrice(current_price)) #print("Reward: "+str(reward)) print("inventory(sell) : " + str(len(self.sell_inventory)) + " inventory(buy) : " + str(len(self.buy_inventory))) print("TOTAL PROFIT: " + str(self.total_profit)) if False: #self.price_idx % 10000 == 1000: try: print("TOTAL PROFIT: " + str(self.total_profit)) print("inventory(sell) : " + str(len(self.sell_inventory)) + " inventory(buy) : " + str(len(self.buy_inventory))) self.trade.draw_trading_view() except: pass done = True if self.price_idx == self.length_data - 1 else False # obs, reward, done, infoを返す return self.data[self.price_idx] + buy_sell_array, reward, done, {}
#from chainerrl.action_value import DiscreteActionValue #from chainerrl.action_value import QuadraticActionValue #from chainerrl.optimizers import rmsprop_async from chainerrl import links from chainerrl import policies import numpy as np import random import time import poloniex import datetime import copy from trade_class import TradeClass trade = TradeClass() price_data = trade.read_bitflyer_json() print("price_data idx 0-10" + str(price_data[0:10])) print("price_data idx last 10" + str(price_data[-1])) ''' def getDataPoloniex(): polo = poloniex.Poloniex() polo.timeout = 10 chartUSDT_BTC = polo.returnChartData('USDT_BTC', period=300, start=time.time() - 1440 * 60 * 500, end=time.time()) # 1440(min)*60(sec)=DAY tmpDate = [chartUSDT_BTC[i]['date'] for i in range(len(chartUSDT_BTC))] date = [datetime.datetime.fromtimestamp(tmpDate[i]) for i in range(len(tmpDate))] data = [float(chartUSDT_BTC[i]['open']) for i in range(len(chartUSDT_BTC))] return date, data ''' #time_date, price_data = getDataPoloniex()
class FxEnv(gym.Env): def __init__(self): self.price_idx = 0 self.trade = TradeClass() training_set = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(training_set[0:10])) print("price_data idx last 10" + str(training_set[-1])) self.input_len = 400 self.n_actions = 3 self.asset_info_len = 2 self.buy_sell_count_len = 4 self.observe_size = self.input_len + self.buy_sell_count_len + self.asset_info_len # env.observation_space.shape[0] self.X_train = [] self.y_train = [] for i in range(self.input_len, len(training_set) - 1001): # X_train.append(np.flipud(training_set_scaled[i-60:i])) self.X_train.append(training_set[i - self.input_len:i]) self.y_train.append(training_set[i]) self.price = self.y_train self.money = 500 self.before_money = copy.deepcopy(self.money) self.cripto = 0.00001 self.total_money = self.money + np.float64(self.price[0] * self.cripto) self.first_total_money = self.total_money self.pass_count = 0 self.buy_sell_count = 0 # buy+ sell - self.pass_renzoku_count = 0 self.buy_sell_fee = 0.00001 self.current_asset = [self.cripto, self.money] self.action_space = gym.spaces.Discrete(3) # 東西南北 self.MAP = np.array([0 for idx in range(0, self.observe_size)]) self.inventory = [] self.observation_space = gym.spaces.Box( low=0, high=3, shape=self.MAP.shape ) self.begin_total_money = self.y_train[0] print("LENGTH OF LOOP NUM:" + str(len(self.X_train))) def _reset(self): self.price_idx = 0 return self.X_train[self.price_idx] + [0, 0, 0, 0] + self.current_asset def _seed(self, seed=None): pass def _render(self, mode='', close=False): # 画面への表示 主にGUI pass def buy_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = money * 0.1 money -= spend * (1 + self.buy_sell_fee) if money <= 0.0: return first_money, first_cripto, first_total_money cripto += float(spend / current_price) total_money = money + cripto * current_price return money, cripto, total_money def sell_simple(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = cripto * 0.1 cripto -= spend * (1 + self.buy_sell_fee) if cripto <= 0.0: return first_money, first_cripto, first_total_money money += float(spend * current_price) total_money = money + float(cripto * current_price) return money, cripto, total_money def pass_simple(self, money, cripto, total_money, current_price): total_money = money + float(cripto * current_price) return money, cripto, total_money def buy_lot(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = current_price * 0.001 money -= spend * (1 + self.buy_sell_fee) EMPTY_MONEY_FLAG = False if money <= 0.0: EMPTY_MONEY_FLAG = True return first_money, first_cripto, first_total_money, EMPTY_MONEY_FLAG cripto += float(spend / current_price) total_money = money + cripto * current_price return money, cripto, total_money, EMPTY_MONEY_FLAG def sell_lot(self, money, cripto, total_money, current_price): first_money, first_cripto, first_total_money = money, cripto, total_money spend = cripto * 0.001 cripto -= spend * (1 + self.buy_sell_fee) EMPTY_MONEY_FLAG = False if cripto <= 0.0: EMPTY_MONEY_FLAG = True return first_money, first_cripto, first_total_money, EMPTY_MONEY_FLAG money += float(spend * current_price) total_money = money + float(cripto * current_price) return money, cripto, total_money, EMPTY_MONEY_FLAG def _step(self, action): if type(action) is list or type(action) is np.ndarray: action = action.tolist() action = action.index(max(action)) else: pass self.price_idx += 1 current_price = self.X_train[self.price_idx][-1] if abs(self.buy_sell_count) >= 10: between_range = 0.0 else: between_range = 1.0 buy_sell_num_array = [1.0, 0.0, abs(self.buy_sell_count), between_range] if self.buy_sell_count >= 0 else [0.0, 1.0, abs( self.buy_sell_count), between_range] self.trade.update_trading_view(current_price, action) reward = 0 if action == 0: print("buy") self.buy_sell_count += 1 self.money, self.cripto, self.total_money = self.buy_simple(self.money, self.cripto, self.total_money, current_price) if self.buy_sell_count < 0: reward += 0.01 else: reward -= 0.01 elif action == 1 and len(self.inventory) > 0: print("sell") self.buy_sell_count -= 1 self.money, self.cripto, self.total_money = self.sell_simple(self.money, self.cripto, self.total_money, current_price) if self.buy_sell_count > 0: reward += 0.01 else: reward -= 0.01 else: print("PASS") self.money, self.cripto, self.total_money = self.pass_simple(self.money, self.cripto, self.total_money, current_price) reward += 0.000000 # -0.001)#0.01 is default self.pass_count += 1 reward += 0.01 * (self.total_money - self.before_money) print("buy_sell" + str(self.buy_sell_count) + "回 action==" + str(action)) self.before_money = self.total_money if False: # self.price_idx % 50000 == 1000: print("last action:" + str(action)) print("TOTAL MONEY" + str(self.total_money)) print("100回中passは" + str(self.pass_count) + "回") # print("100回中buy_sell_countは" + str(self.buy_sell_count) + "回") self.pass_count = 0 try: self.trade.draw_trading_view() except: pass print("begin MONEY: " + str(self.begin_total_money)) print("current MONEY: " + str(self.total_money)) print("price_IDX: " + str(self.price_idx)) print("Reward: " + str(reward)) self.current_asset = [self.cripto, self.money] # obs, reward, done, infoを返す return self.X_train[self.price_idx] + buy_sell_num_array + self.current_asset, reward, False, {}
from keras.layers.core import Dense, Activation from keras.layers.recurrent import LSTM from keras.optimizers import Adam from keras.callbacks import EarlyStopping import numpy as np import random import sys, os, copy, traceback from sklearn.utils import shuffle from trade_class import TradeClass from sklearn import preprocessing ss = preprocessing.StandardScaler() print(os.path.basename(__file__)) tradecl = TradeClass() price_data = tradecl.ReadPoloniexCSV() np.set_printoptions(threshold=np.inf) print("price_data idx 0-10" + str(price_data[0:10])) print("price_data idx last 10" + str(price_data[-1])) input_price_len = 400 input_discrete_value_size = 3 total_input_size = input_price_len + input_discrete_value_size n_actions = 3 #obs_size = input_len+n_actions#shape#env.observation_space.shape[0] #データを標準化して、ディープラーニングで学習しやすくする。 def standarization(x, axis=None): x = np.array(x)
class FxEnv(gym.Env): def __init__(self): self.price_idx = 0 self.trade = TradeClass() price_data = self.trade.read_bitflyer_json() print("price_data idx 0-10" + str(price_data[0:10])) print("price_data idx last 10" + str(price_data[-1])) input_len = 400 n_actions = 3 obs_size = input_len + n_actions # shape#env.observation_space.shape[0] training_set = copy.copy(price_data) X_train = [] y_train = [] for i in range(input_len, len(training_set) - 1001): # X_train.append(np.flipud(training_set_scaled[i-60:i])) X_train.append(training_set[i - input_len:i]) y_train.append(training_set[i]) price = y_train money = 300 before_money = money ethereum = 0.01 total_money = money + np.float64(price[0] * ethereum) first_total_money = total_money pass_count = 0 buy_sell_count = 0 # buy+ sell - pass_renzoku_count = 0 def _reset(self): self.price_idx = 0 return X_train[0] def _step(self, action): self.price_idx += 1 reward = 0 current_price = X_train[self.price_idx][-1] buy_sell_num_flag = [ 1.0, 0.0, buy_sell_count ] if buy_sell_count >= 1 else [0.0, 1.0, buy_sell_count] self.trade.update_trading_view(current_price, action) pass_reward = 0 if action == 0: print("buy") buy_sell_count += 1 money, ethereum, total_money = buy_simple(money, ethereum, total_money, current_price) elif action == 1: print("sell") buy_sell_count -= 1 money, ethereum, total_money = sell_simple(money, ethereum, total_money, current_price) else: print("PASS") money, ethereum, total_money = pass_simple(money, ethereum, total_money, current_price) pass_reward = 0.0 # -0.001)#0.01 is default self.pass_count += 1 reward = total_money - before_money + pass_reward if buy_sell_count >= 5 and action == 0: print("buy_sell" + str(buy_sell_count) + "回 action==" + str(action)) reward -= (float(abs(buy_sell_count)**2)) print(reward) elif buy_sell_count <= -5 and action == 1: print("buy_sell" + str(buy_sell_count) + "回 action==" + str(action)) reward -= (float(abs(buy_sell_count)**2)) print(reward) else: # reward 1.0がちょうど良い! reward += 1.1 before_money = total_money if idx % 2000 == 1000: print("last action:" + str(action)) print("TOTAL MONEY" + str(total_money)) print("100回中passは" + str(pass_count) + "回") # print("100回中buy_sell_countは" + str(buy_sell_count) + "回") self.pass_count = 0 trade.draw_trading_view() agent.save('chainerRLAgent') # obs, reward, done, infoを返す return X_train[price_idx], reward, False, None