def main(debug): interval = choice(INTERVALS) minutes = 0 while True: minutes += 1 seconds_to_run = 60 * minutes seconds_info_intervals = seconds_to_run / 4 logging.error('Training each currency for {0} minutes'.format(minutes)) # shuffle(CURRENCIES) for currency, min_trail in CURRENCIES.iteritems(): pip_mul = 100. if 'JPY' in currency else 10000. actions = calculateActions(min_trail) df = loadData(currency, interval) df = df[-2000:] df = getBackgroundKnowledge(df, PERIODS) # print df # break alpha = 0 epsilon = 0 q = loadQ(currency, interval) time_start = time() time_interval = 0 epoch = 0 rewards = [] errors = [] ticks = [] logging.warn('Training {0} on {1} with {2} ticks...'.format(currency, interval, len(df))) while True: epoch += 1 logging.info(' ') logging.info('{0}'.format('=' * 20)) logging.info('EPOCH {0}'.format(epoch)) index_start = randint(0, len(df)-20) df_inner = df.iloc[index_start:] logging.info('Epoch: at {0} with {1} ticks'.format(index_start, len(df_inner))) q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, actions, pip_mul) # results rewards.append(r) errors.append(error * pip_mul) ticks.append(tick) # win ratio wins = [1. if r > 0. else 0. for r in rewards] win_ratio = np.mean(wins) # logging.error('wr {0}'.format(win_ratio)) # adjust values epsilon = np.sqrt((1 - win_ratio) * 100.) / 100. alpha = epsilon / 2. # logging.error('new alpha = {0}'.format(alpha)) if time() - time_start > time_interval or debug: # prune lengths while len(rewards) > 1000 + minutes * 1000: rewards.pop(0) errors.pop(0) ticks.pop(0) # RMSD rmsd = np.sqrt(np.mean([e**2 for e in errors])) logging.info('RMSD: {0}'.format(rmsd)) logging.warn('{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'.format( currency, epoch, int(rmsd), int(np.mean(rewards) * pip_mul), sum(rewards), np.mean(wins) * 100, np.mean(ticks), alpha * 100, epsilon * 100, )) # exit if (time() - time_start >= seconds_to_run) or debug: break # saveQ(currency, interval, q) time_interval += seconds_info_intervals saveQ(currency, interval, q) summarizeActions(q) if debug: break # currencies if debug: break # forever
def main(debug): interval = choice(INTERVALS) minutes = 0 while True: minutes += 1 seconds_to_run = 60 * minutes seconds_info_intervals = seconds_to_run / 4 logging.error('Training each currency for {0} minutes'.format(minutes)) # shuffle(CURRENCIES) for currency, min_trail in CURRENCIES.iteritems(): pip_mul = 100. if 'JPY' in currency else 10000. actions = calculateActions(min_trail) df = loadData(currency, interval) df = df[-2000:] df = getBackgroundKnowledge(df, PERIODS) # print df # break alpha = 0 epsilon = 0 q = loadQ(currency, interval) time_start = time() time_interval = 0 epoch = 0 rewards = [] errors = [] ticks = [] logging.warn('Training {0} on {1} with {2} ticks...'.format( currency, interval, len(df))) while True: epoch += 1 logging.info(' ') logging.info('{0}'.format('=' * 20)) logging.info('EPOCH {0}'.format(epoch)) index_start = randint(0, len(df) - 20) df_inner = df.iloc[index_start:] logging.info('Epoch: at {0} with {1} ticks'.format( index_start, len(df_inner))) q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, actions, pip_mul) # results rewards.append(r) errors.append(error * pip_mul) ticks.append(tick) # win ratio wins = [1. if r > 0. else 0. for r in rewards] win_ratio = np.mean(wins) # logging.error('wr {0}'.format(win_ratio)) # adjust values epsilon = np.sqrt((1 - win_ratio) * 100.) / 100. alpha = epsilon / 2. # logging.error('new alpha = {0}'.format(alpha)) if time() - time_start > time_interval or debug: # prune lengths while len(rewards) > 1000 + minutes * 1000: rewards.pop(0) errors.pop(0) ticks.pop(0) # RMSD rmsd = np.sqrt(np.mean([e**2 for e in errors])) logging.info('RMSD: {0}'.format(rmsd)) logging.warn( '{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]' .format( currency, epoch, int(rmsd), int(np.mean(rewards) * pip_mul), sum(rewards), np.mean(wins) * 100, np.mean(ticks), alpha * 100, epsilon * 100, )) # exit if (time() - time_start >= seconds_to_run) or debug: break # saveQ(currency, interval, q) time_interval += seconds_info_intervals saveQ(currency, interval, q) summarizeActions(q) if debug: break # currencies if debug: break # forever
def main(debug): minutes = 0 while True: minutes += 1 seconds_to_run = 60 * minutes seconds_info_intervals = seconds_to_run / 5 logging.error('Training each currency for {0} minutes'.format(minutes)) shuffle(DATA) for info in DATA: logging.debug('Currency info: {0}'.format(info)) currency = info['currency'] interval = info['intervals'][0] pip_mul = info['pip_mul'] df = loadData(currency, interval, 'train') df = getBackgroundKnowledge(df, PERIODS) alpha = 0. epsilon = alpha / 2. q = loadQ(currency, interval) time_start = time() time_interval = seconds_info_intervals epoch = 0 rewards = [] errors = [] ticks = [] logging.warn('Training {0} on {1} with {2} ticks [m:{3}]'.format( currency, interval, len(df), minutes, )) while True: epoch += 1 logging.info(' ') logging.info('{0}'.format('=' * 20)) logging.info('EPOCH {0}'.format(epoch)) index_start = randint(0, len(df)-20) df_inner = df.iloc[index_start:] logging.info('Epoch: at {0} with {1} ticks'.format(index_start, len(df_inner))) q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, ACTIONS, pip_mul, info['std']) # results error *= pip_mul rewards.append(r) errors.append(error) ticks.append(tick) # win ratio wins = [1. if r > 0. else 0. for r in rewards] win_ratio = np.mean(wins) # logging.error('wr {0}'.format(win_ratio)) # adjust values alpha = 1.0102052281586786e+000 + (-2.0307383627607809e+000 * win_ratio) + (1.0215546892913909e+000 * win_ratio**2) epsilon = 3.9851080604500078e-001 + (2.1874724815820201e-002 * win_ratio) + (-4.1444101741886652e-001 * win_ratio**2) # logging.error('new alpha = {0}'.format(alpha)) # only do updates at interval if time() - time_start > time_interval or debug: # prune lengths while len(rewards) > 1000 + minutes * 1000: rewards.pop(0) errors.pop(0) ticks.pop(0) # RMSD rmsd = np.sqrt(np.mean([e**2 for e in errors])) # logging.error('RMSD: {0} from new error {1}'.format(rmsd, error)) logging.warn('{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]'.format( currency, epoch, int(rmsd), int(np.mean(rewards) * pip_mul), sum(rewards), np.mean(wins) * 100, np.mean(ticks), alpha * 100, epsilon * 100, )) # exit if time_interval >= seconds_to_run or debug: break # continue time_interval += seconds_info_intervals saveQ(currency, interval, q) saveQ(currency, interval, q) summarizeActions(q) if debug: break # currencies if debug: break # forever
def main(debug): minutes = 0 while True: minutes += 1 seconds_to_run = 60 * minutes seconds_info_intervals = seconds_to_run / 5 logging.error('Training each currency for {0} minutes'.format(minutes)) shuffle(DATA) for info in DATA: logging.debug('Currency info: {0}'.format(info)) currency = info['currency'] interval = info['intervals'][0] pip_mul = info['pip_mul'] df = loadData(currency, interval, 'train') df = getBackgroundKnowledge(df, PERIODS) alpha = 0. epsilon = alpha / 2. q = loadQ(currency, interval) time_start = time() time_interval = seconds_info_intervals epoch = 0 rewards = [] errors = [] ticks = [] logging.warn('Training {0} on {1} with {2} ticks [m:{3}]'.format( currency, interval, len(df), minutes, )) while True: epoch += 1 logging.info(' ') logging.info('{0}'.format('=' * 20)) logging.info('EPOCH {0}'.format(epoch)) index_start = randint(0, len(df) - 20) df_inner = df.iloc[index_start:] logging.info('Epoch: at {0} with {1} ticks'.format( index_start, len(df_inner))) q, r, error, tick = train(df_inner, q, alpha, epsilon, PERIODS, ACTIONS, pip_mul, info['std']) # results error *= pip_mul rewards.append(r) errors.append(error) ticks.append(tick) # win ratio wins = [1. if r > 0. else 0. for r in rewards] win_ratio = np.mean(wins) # logging.error('wr {0}'.format(win_ratio)) # adjust values alpha = 1.0102052281586786e+000 + ( -2.0307383627607809e+000 * win_ratio) + (1.0215546892913909e+000 * win_ratio**2) epsilon = 3.9851080604500078e-001 + ( 2.1874724815820201e-002 * win_ratio) + (-4.1444101741886652e-001 * win_ratio**2) # logging.error('new alpha = {0}'.format(alpha)) # only do updates at interval if time() - time_start > time_interval or debug: # prune lengths while len(rewards) > 1000 + minutes * 1000: rewards.pop(0) errors.pop(0) ticks.pop(0) # RMSD rmsd = np.sqrt(np.mean([e**2 for e in errors])) # logging.error('RMSD: {0} from new error {1}'.format(rmsd, error)) logging.warn( '{0} [{1:05d}] RMSD {2:03d} PPT {3:03d} WR {5:.0f}% [ticks:{6:.1f} sum:{4:.1f}, a:{7:.2f}, e:{8:.2f}]' .format( currency, epoch, int(rmsd), int(np.mean(rewards) * pip_mul), sum(rewards), np.mean(wins) * 100, np.mean(ticks), alpha * 100, epsilon * 100, )) # exit if time_interval >= seconds_to_run or debug: break # continue time_interval += seconds_info_intervals saveQ(currency, interval, q) saveQ(currency, interval, q) summarizeActions(q) if debug: break # currencies if debug: break # forever