def run_preprocess(argv=None): """Runs the retrieval and preprocessing of the data. Args: args: args that are passed when submitting the training Returns: """ logging.info('starting preprocessing of data..') args = parse_arguments(sys.argv if argv is None else argv) tickers = [ 'snp', 'nyse', 'djia', 'nikkei', 'hangseng', 'ftse', 'dax', 'aord' ] closing_data = preprocess.load_data(tickers, args.es_address, args.cutoff_year) time_series = preprocess.preprocess_data(closing_data) logging.info('preprocessing of data complete..') logging.info('starting uploading of the preprocessed data on Ceph..') temp_folder = 'data' if not os.path.exists(temp_folder): os.mkdir(temp_folder) file_path = os.path.join(temp_folder, 'data_{}.csv'.format(args.cutoff_year)) time_series.to_csv(file_path, index=False) storage_helper.upload_to_storage(args.bucket, temp_folder, args.endpoint_url, args.access_key, args.secret_key) shutil.rmtree(temp_folder) if args.kfp: with open("/store_path.txt", "w") as output_file: output_file.write(file_path) logging.info('upload of the preprocessed data on Ceph completed..')
def send_pratical_request(date="2014-08-12"): """Obtain the prediction for a certain date in the test set. Args: date (str): request date to obtain prediction """ # create input from request date tickers = [ 'snp', 'nyse', 'djia', 'nikkei', 'hangseng', 'ftse', 'dax', 'aord' ] closing_data = preprocess.load_data(tickers) index = closing_data.index.get_loc(date) - 7 # because first 7 days are not accounted in the time series training_test_data = preprocess.preprocess_data(closing_data) input_tensor = np.expand_dims( training_test_data[training_test_data.columns[2:]].values[index], axis=0).astype(np.float32) request_helper.send_request(input_tensor)
def fit(params): """Fit the polynomial model which includes the IQ mixer. """ ##### Load and prepare data ##### x, y, noise, measured_noise_power = load_data( 'data/fdTestbedData' + str(params.sampling_freq_MHz) + 'MHz10dBm', params) # Print total number of real parameters to be estimated # This number includes the linear as well n_poly = int(params.hsi_len * ((params.max_power + 1) / 2) * ((params.max_power + 1) / 2 + 1)) print( "Total number of real parameters to estimate for polynomial based canceller: {:d}" .format(2 * n_poly)) # Split into training and test sets training_samples = int(np.floor(x.size * params.training_ratio)) x_train = x[0:training_samples] y_train = y[0:training_samples] x_test = x[training_samples:] y_test = y[training_samples:] # Remove samples when training with less samples training_samples = int(np.floor(training_samples * params.training_size)) x_train = x[0:training_samples] y_train = y[0:training_samples] ##### Training ##### # Estimate linear and non-linear cancellation parameters h_lin = poly.si_estimation_linear(x_train, y_train, params) # This actually also estimates the linear! So we get both h_nonlin = poly.si_estimation_nonlinear(x_train, y_train, params) ##### Test ##### # Do linear and non-linear cancellation y_canc = poly.si_cancellation_linear(x_test, h_lin, params) # NB: This actually contains the linear cancellation AND the non-linear # cancellation. y_canc_nonlin = poly.si_cancellation_nonlinear(x_test, h_nonlin, params) ##### Evaluation ##### # Scale signals according to known noise power y_test, y_canc, y_canc_nonlin, noise = sp.compute_scaling( noise, measured_noise_power, y_test, y_canc, y_canc_nonlin) if params.save: path = "results/results_wlmp" else: path = "results/tmp_wlmp" if not os.path.exists(path): os.makedirs(path) # Plot PSD and get signal powers noise_power, y_test_power, y_test_lin_canc_power, y_test_nonlin_canc_power = sp.plotPSD( params, y_test[params.hsi_len:], y_canc[params.hsi_len:], y_canc_nonlin[params.hsi_len:], noise, y_var=1, path=path) # We actually have that it performs the total cancellation! # model_canc then refers to the difference in performance between the linear # and non-linear canceller (where non-linear includes linear) model_canc = y_test_lin_canc_power - y_test_nonlin_canc_power # Total cancellation of model total_canc = y_test_power - y_test_nonlin_canc_power # Complexity (where we subtract the linear! and then we add the linear back later) n_cadd = n_poly - params.hsi_len - 1 n_cmult = n_poly - params.hsi_len # Take result in terms of complex-valued additions, multiplications and convert # to total real-valued ops. model_add, model_mult = flop_convert(n_cadd, n_cmult, algo="reduced_cmult") model_act = 0 model_flop = model_add + model_mult total_add = model_add total_mult = model_mult total_act = 0 lin_add, lin_mult, lin_act = flop_linear_polynomial(params, algo="reduced_cmult") total_add += lin_add + 2 total_mult += lin_mult total_act += lin_act total_flop = total_add + total_mult + total_act # Convert to real total_params = 2 * n_poly data = OrderedDict([ ('total_flop', total_flop), ('total_add', total_add), ('total_mult', total_mult), ('total_canc_max', total_canc), ('total_params', total_params), ('model_flop', model_flop), ('model_add', model_add), ('model_mult', model_mult), ('model_act', model_act), ('model_canc', model_canc), ('training_size', params.training_size), ('min_power', params.min_power), ('max_power', params.max_power), ]) file_path = path + os.sep + "wlmp.csv" df = pd.DataFrame(data, columns=data.keys(), index=[0]) if os.path.exists(file_path): df_restored = pd.read_csv(file_path) df = df.append(df_restored) df = df.sort_values(by=['total_flop']) df.to_csv(file_path, index=False)