def get_data(self): if not self._is_shuffled and self._shuffle: def _rng_fn(): return self.rng self.sampler = DataSampler(self.root, self.split, _rng_fn, self.k_shot, folds=self.folds) for _ in list(xrange(self.size())): img_fns, boxes, classes, target_class = self.sampler.next() imgs = self._load_images(img_fns) yield [imgs, boxes, classes, target_class]
def main(): # define parser to pass options from command line parser = argparse.ArgumentParser() parser.add_argument('--mp', required=True, default=0) parser.add_argument('--nb_samples', required=True, default=1) parser.add_argument('--cn0', required=True, default=1) parser.add_argument('--discr', required=True, default=1) args = parser.parse_args() multipath_option = False if args.mp == '0' else True print('check multipath option before sampling: ', args.mp, multipath_option) # coherent integration period Tint = 20e-3 # Main for data generation discr_size_fd = int(args.discr) scale_code = int(args.discr) # multipath intervals delta_tau_interv = [0, 3/2] delta_dopp_max = min(3/Tint, 800) delta_dopp_interv = [-delta_dopp_max, delta_dopp_max] delta_phase = 0 alpha_att_interv = [0.5, 0.9] cn0_log = int(args.cn0) print('CHECK CNO ratio: ', cn0_log) # doppler interval dopp_max = min(5.5/Tint, 800+2.5/Tint) dopp_interval = [-dopp_max, dopp_max] # code interval tau_interval = [-3/2, 5/2] print('MODS IN INTERVALS CHECKED') noise_i_path = r'corr_noise_generator/outputs/i_channel/*.csv' noise_q_path = r'corr_noise_generator/outputs/q_channel/*.csv' save_path = r'synth_data/discr_{}/'.format(discr_size_fd) data_sampler = DataSampler( discr_size_fd=discr_size_fd, scale_code=scale_code, Tint=Tint, multipath_option=multipath_option, delta_tau_interv=delta_tau_interv, delta_dopp_interv=delta_dopp_interv, delta_phase=delta_phase, alpha_att_interv=alpha_att_interv, tau=tau_interval, dopp=dopp_interval, cn0_log=cn0_log ) data_sampler.read_noise(noise_i_path, noise_q_path, matrix_shape=(discr_size_fd, scale_code), nb_samples=int(args.nb_samples)) data_sampler.generate_corr(nb_samples=int(args.nb_samples)) data_sampler.sum_matr(save_csv=True, save_path=save_path)
def main(): for solver in SOLVERS: for loss_type in LOSS_FUNCS: for h in HIDDEN_SIZES: print('Hidden sizes = {}'.format(h)) hidden_sizes = h for lr in LEARNING_RATES: ds = DataSampler() arch = MODEL + '_' + 'x'.join( [str(i) for i in hidden_sizes]) + 'x{}'.format(lr) task = '_'.join([arch, loss_type, solver]) print('[TRAIN] Start experiment: {}'.format(task)) classifier = BinaryClassifier(data_sampler=ds, task_name=task, hidden_sizes=hidden_sizes, model=MODEL, solver_type=solver, activation='relu', loss_func=loss_type, learning_rate=lr) if (FLAGS.sanity_check): classifier.overfit_test() else: classifier.train() # reset the model tf.reset_default_graph() print('[TRAIN] Done {}, reset network.'.format(task))
def parse_options(self, options): if "sampler" in options: self.sampler = options["sampler"] else: self.sampler = DataSampler() for key, value in options.items(): if key in self.option_desc: setattr(self, key, value)
class TensorpackReader(RNGDataFlow): def __init__(self, root, split, k_shot=1, shuffle=False, noise_rate=0.0, mask_dir=None, class_agnostic=False, folds=None): self.root = root self.k_shot = k_shot self.split = split self.mask_dir = mask_dir self.folds = folds rng = np.random.RandomState(1357) def _rng_fn(): return rng self.sampler = DataSampler(root, split, _rng_fn, k_shot, noise_rate, class_agnostic, folds=folds) self._shuffle = shuffle self._is_shuffled = False def size(self): return self.sampler.cls_reader.nr_images def _load_images(self, img_fns): imgs = [] for fn in img_fns: img_path = osp.join(self.root, 'Images', fn) img = cv2.imread(img_path, cv2.IMREAD_COLOR) if self.mask_dir: mask_fn = osp.splitext(fn)[0] + '.png' mask_path = osp.join(self.root, self.mask_dir, mask_fn) if osp.exists(mask_path): mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE) img[mask>0, :] = 0 imgs.append(img) return imgs def get_data(self): if not self._is_shuffled and self._shuffle: def _rng_fn(): return self.rng self.sampler = DataSampler(self.root, self.split, _rng_fn, self.k_shot, folds=self.folds) for _ in list(xrange(self.size())): img_fns, boxes, classes, target_class = self.sampler.next() imgs = self._load_images(img_fns) yield [imgs, boxes, classes, target_class]
def __init__(self, root, split, k_shot=1, shuffle=False, noise_rate=0.0, mask_dir=None, class_agnostic=False, folds=None): self.root = root self.k_shot = k_shot self.split = split self.mask_dir = mask_dir self.folds = folds rng = np.random.RandomState(1357) def _rng_fn(): return rng self.sampler = DataSampler(root, split, _rng_fn, k_shot, noise_rate, class_agnostic, folds=folds) self._shuffle = shuffle self._is_shuffled = False
saved = json.load(file) class Config: def __init__(self, **entries): self.__dict__.update(entries) return Config(**saved) """ MAIN PROGRAM """ with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: data_sampler = DataSampler(FLAGS.DATA_PATH, FLAGS.LABEL_PATH, FLAGS.TRAINSET_RATIO, SINGLE_TIME=FLAGS.single_time_step) num_train_data = data_sampler.num_train num_test_data = data_sampler.num_test #logfile = '/'.join(['logs', task_name]) #ckptfile = '/'.join(['checkpoints', task_name]) logfile = os.path.join('logs', FLAGS.task_name) ckptfile = os.path.join('checkpoints', FLAGS.task_name) outfile = os.path.join('checkpoints', 'lstmw_' + FLAGS.task_name) if not tf.gfile.Exists(logfile): tf.gfile.MakeDirs(logfile) if not tf.gfile.Exists(ckptfile): tf.gfile.MakeDirs(ckptfile) if not FLAGS.is_train and not tf.gfile.Exists(outfile):
def load_configs(path): config_path = os.path.join(path, 'configs.json') print ('Load configurations from {}'.format(config_path)) with open(config_path) as file: saved = json.load(file) class Config: def __init__(self, **entries): self.__dict__.update(entries) return Config(**saved) """ MAIN PROGRAM """ with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: data_sampler = DataSampler(FLAGS.DATA_PATH, FLAGS.LABEL_PATH, FLAGS.TRAINSET_RATIO) num_train_data = data_sampler.num_train num_test_data = data_sampler.num_test ckptfile = '/'.join(['checkpoints', FLAGS.model_name]) if not tf.gfile.Exists(ckptfile): sys.exit ("Model {} not exist!".format(FLAGS.model_name)) if not tf.gfile.Exists(FLAGS.output_name): tf.gfile.MakeDirs(FLAGS.output_name) CONFIGS = load_configs(ckptfile) print ('=== LOSS SPECTRUM ANALYSIS ===') x = tf.placeholder(tf.float32, [None, 1, data_sampler.x_dim] , name='x')
if "use_context" in params: use_context = params["use_context"] evaluator = Evaluator(recommender, sampler) results = [] for n in nvalues: t1 = time.time() hrn = evaluator.hit_ratio_total(n, use_context=use_context) results.append(hrn) print "elapsed time %.2fmin"%(float(time.time()-t1)/60.) print results def do_evaluation(sampler): eval_algorithm(sampler, algorithm_cls=UserCF) params = { "use_dow" : True, "use_tod" : False } eval_algorithm(sampler, algorithm_cls=ReductionUserCF, **params) if __name__=="__main__": sampler = DataSampler(log_perc=[25,25]) sampler.load() do_evaluation(sampler)
import numpy as np from data_sampler import DataSampler dir_with_pickles = "./Data/Bitfinex/BTC-USD/" save_to = "./Data/Bitfinex/Processed/" timedelta = np.timedelta64(5, "s") DataSampler.sample_files_from_dir(dir_with_pickles, save_to, timedelta)
import numpy as np from data_sampler import DataSampler from feature_manager import FeatureManager basic_path = "./Data/Poloniex/" for pair in ["BTCETH", "BTCGAS", "BTCXRP"]: print("Extracting " + pair) DataSampler.sample_files_from_dir( target_dir=basic_path + pair + "/", save_to_dir=basic_path + pair + "/", time_delta=np.timedelta64(30, "s"), file_type="csv", start_date=np.datetime64("2018-01-21T00:00:00"), end_date=np.datetime64("2018-02-06T14:21:00"), name=pair + "_30") for pair in ["BTCETH", "BTCGAS", "BTCXRP"]: feature_manager = FeatureManager() feature_manager.read_dataset_from_csv("{}{}/{}_30.csv".format( basic_path, pair, pair)) print(feature_manager.get_trades_df().head()) feature_manager.add_indicators() feature_manager.save_dataset_to_csv("{}{}/all_in_one_30.csv".format( basic_path, pair))
#visualize_plt(samples[0]['table'][...,1]) #visualize_plt(module) #%% check fake noise factor in data sampler noise_i_path = r'corr_noise_generator/outputs/i_channel/*.csv' noise_q_path = r'corr_noise_generator/outputs/q_channel/*.csv' for cn0_log in [20, 30, 40, 50, 60]: print('----', cn0_log) data_sampler = DataSampler( discr_size_fd=discr_size_fd, scale_code=scale_code, Tint=Tint, multipath_option=False, delta_tau_interv=delta_tau_interv, delta_dopp_interv=delta_dopp_interv, delta_phase=delta_phase, alpha_att_interv=alpha_att_interv, tau=tau_interval, dopp=dopp_interval, cn0_log=cn0_log ) nb_samples = 1 data_sampler.read_noise(noise_i_path, noise_q_path, matrix_shape=(discr_size_fd, scale_code), nb_samples=nb_samples) data_sampler.generate_corr(nb_samples=nb_samples) matr_i, matr_q = data_sampler.sum_matr(save_csv=False) plt.imshow(matr_i[0,...])
if "use_context" in params: use_context = params["use_context"] evaluator = Evaluator(recommender, sampler) results = [] for n in nvalues: t1 = time.time() hrn = evaluator.hit_ratio_total(n, use_context=use_context) results.append(hrn) print "elapsed time %.2fmin" % (float(time.time() - t1) / 60.) print results def do_evaluation(sampler): eval_algorithm(sampler, algorithm_cls=UserCF) params = {"use_dow": True, "use_tod": False} eval_algorithm(sampler, algorithm_cls=ReductionUserCF, **params) if __name__ == "__main__": sampler = DataSampler(log_perc=[25, 25]) sampler.load() do_evaluation(sampler)
import tensorflow as tf import numpy as np from data_sampler import DataSampler from democonfig import DemoConfig from seq2seq import Seq2SeqModel with tf.Session() as sess: config = DemoConfig() data = DataSampler() model = Seq2SeqModel(config, data) model.train(sess)