# h5 savefile if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) train_store = utils.create_hdf5(args.out_dir+'/train_'+args.start_day+'_'+args.end_day) validate_store = utils.create_hdf5(args.out_dir+'/validate_'+args.start_day+'_'+args.end_day) test_store = utils.create_hdf5(args.out_dir+'/test_'+args.start_day+'_'+args.end_day) # save dates to file train_f = open(args.out_dir+'/datelist_train_'+args.start_day+'_'+args.end_day+'.txt', 'w') validate_f = open(args.out_dir+'/datelist_validate_'+args.start_day+'_'+args.end_day+'.txt', 'w') test_f = open(args.out_dir+'/datelist_test_'+args.start_day+'_'+args.end_day+'.txt', 'w') utils.save_dates_to_file(train_f, train_set_str) utils.save_dates_to_file(validate_f, validate_set_str) utils.save_dates_to_file(test_f, test_set_str) train_set_str = utils.gen_lkbk_days(day_list=train_set) validate_set_str = utils.gen_lkbk_days(day_list=validate_set) test_set_str = utils.gen_lkbk_days(day_list=test_set) else: # Pick a date at random # Generate a list of 60 business days starting from the random date chosen start_day = sample(trading_days, 1)[0] if args.chosen_day is not None: start_day = datetime.strptime(args.chosen_day, '%Y%m%d') training_set = date_range(start_day, periods=args.ndays, freq='B') train_set_str = [date.date().strftime('%Y%m%d') for date in training_set] # h5 savefile if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) train_store = utils.create_hdf5(args.out_dir+'/'+start_day.strftime('%Y%m%d'))
training_set_str = [line[:-1] for line in f] training_set = [ datetime.strptime(x, '%Y%m%d').replace(hour=9, minute=30) for x in training_set_str ] # file to backup console prints log_file = open(args.out_dir + '/log_' + args.dataset[:-3] + '.txt', 'w') today_data_all = {} lkbk_days_data_all = {} multiplier = {} for i in range(len(training_set)): today = training_set[i] lkbk_days = utils.gen_lkbk_days(today=today) lkbk_days = [datetime.strptime(x, '%Y%m%d') for x in lkbk_days] utils._print(log_file, 'processing %s' % today) try: today_time_range = utils.day_time_range(today) vols = input_comp.ix[:, today, 'Volume'] closes = input_comp.ix[:, today, 'Close'] liqs = vols * closes liqs = liqs / liqs.sum() liqs = liqs.fillna(0) # get top input components by liquidity liqs = liqs[np.argsort(liqs)[::-1]] topn_input = liqs.index[:]
# fetch/generate dates f = open(args.in_dir+'/datelist_'+args.dataset[:-3]+'.txt') training_set_str = [line[:-1] for line in f] training_set = [datetime.strptime(x, '%Y%m%d').replace(hour=9, minute=30) for x in training_set_str] # file to backup console prints log_file = open(args.out_dir+'/log_'+args.dataset[:-3]+'.txt', 'w') today_data_all = {} lkbk_days_data_all = {} multiplier = {} for i in range(len(training_set)): today = training_set[i] lkbk_days = utils.gen_lkbk_days(today=today) lkbk_days = [datetime.strptime(x,'%Y%m%d') for x in lkbk_days] utils._print(log_file, 'processing %s' % today) try: today_time_range = utils.day_time_range(today) vols = input_comp.ix[:, today, 'Volume'] closes = input_comp.ix[:, today, 'Close'] liqs = vols * closes liqs = liqs / liqs.sum() liqs = liqs.fillna(0) # get top input components by liquidity liqs = liqs[np.argsort(liqs)[::-1]] topn_input = liqs.index[:]
# save dates to file train_f = open( args.out_dir + '/datelist_train_' + args.start_day + '_' + args.end_day + '.txt', 'w') validate_f = open( args.out_dir + '/datelist_validate_' + args.start_day + '_' + args.end_day + '.txt', 'w') test_f = open( args.out_dir + '/datelist_test_' + args.start_day + '_' + args.end_day + '.txt', 'w') utils.save_dates_to_file(train_f, train_set_str) utils.save_dates_to_file(validate_f, validate_set_str) utils.save_dates_to_file(test_f, test_set_str) train_set_str = utils.gen_lkbk_days(day_list=train_set) validate_set_str = utils.gen_lkbk_days(day_list=validate_set) test_set_str = utils.gen_lkbk_days(day_list=test_set) else: # Pick a date at random # Generate a list of 60 business days starting from the random date chosen start_day = sample(trading_days, 1)[0] if args.chosen_day is not None: start_day = datetime.strptime(args.chosen_day, '%Y%m%d') training_set = date_range(start_day, periods=args.ndays, freq='B') train_set_str = [date.date().strftime('%Y%m%d') for date in training_set] # h5 savefile if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) train_store = utils.create_hdf5(args.out_dir + '/' +