def main(is_adv): packets = (iv_list, ephid_list, tag_list) = read_packets() # Read all the packets received print('#EphIDs:', len(packets[1])) keys = (public_key_list, sk_list) = read_keys( ) # Read all the public keys of infected users received print('#SK:', len(keys[1])) for (public_key, sk) in zip(*keys): encryptor = Encryptor(sk) for (iv, ephid, tag) in zip(*packets): ciphertext = encryptor.encrypt( iv=iv) # Generate the EphIDs corresponding to each SK ephids = ciphertext[IV_SIZE:] blocks = split_sequence(ephids, N) advtag = tag[:-1] + token_bytes( 1) # The tag computed by an adversary # Check if one of the received EphIDs can be generated by SK if ephid not in blocks: continue else: # the tag to send is the received one itself if the user is honest tag_to_send = advtag if is_adv else tag print(ephid.hex()) retval = verify( public_key, ephid, tag_to_send) # True if the tag is honest, False otherwise print(retval) data = public_key + ephid + tag_to_send # Send <pk,ephid,tag> to server send_data_to_server(data)
def find_division(self, elements, clusters, cluster): """ find the division with given cluster as seperator Parameters ---------- elements: list a list of the HTML element clusters: list a list of the cluster id for each element in ``elements`` cluster: int the cluster id to split the elements. Returns ------- A list of ``Record`` """ assert len(elements) == len(clusters) if len(set(clusters)) == len(clusters): return None records = [] for group in split_sequence(zip(elements, clusters), lambda x: x[1] == cluster): _clusters = [g[1] for g in group] if len(_clusters) < len(clusters): records.append(Record(*[g[0] for g in group])) return records
def predict(training_input, test_input): X, y = split_sequence(training_input, n_steps) model = fit_model(X, y) x_input = array(test_input) x_input = x_input.reshape((1, n_steps)) pred = model.predict(x_input, verbose=0) print("MLP prediction: " + str(pred))
def predict(training_input, test_input): X, y = split_sequence(training_input, n_steps) X = X.reshape((X.shape[0], X.shape[1], n_features)) model = fit_model(X, y) x_input = array(test_input) x_input = x_input.reshape((1, n_steps, n_features)) pred = model.predict(x_input, verbose=0) print("GRU prediction: " + str(pred))
def __init__(self, data_dir, lookahead, tau, k, max_len, n_groups=1, dim=3, normalize=False): self.data_dir = data_dir self.lookahead = lookahead self.tau = tau self.k = k self.max_len = max_len self.data_dim = dim all_entire_trajectories = load_h5_data(data_dir) for n in range(n_groups): entire_trajectory = all_entire_trajectories[n][:, :dim] # shorten trajectory if less data is wanted if len(entire_trajectory) > self.max_len: entire_trajectory = entire_trajectory[:self.max_len] else: logging.warning("max_len = {} exceeds maximum length of dataset trajectory = {}".format(self.max_len, len(entire_trajectory))) pass # split trajectory and write it into futures and histories if n == 0: self.histories, self.futures = split_sequence(entire_trajectory, self.lookahead, self.tau, self.k) else: histories, futures = split_sequence(entire_trajectory, self.lookahead, self.tau, self.k) self.histories = torch.cat((self.histories, histories), dim=0) self.futures = torch.cat((self.futures, futures), dim=0) # normalize the data w.r.t. to the maximum value dims = len(self.histories[0, 0, :]) for dim in range(dims): max_value = torch.max(self.histories[:, :, dim]) if normalize: self.histories[:, :, dim] = self.histories[:, :, dim]/max_value self.futures[:, :, dim] = self.futures[:, :, dim]/max_value self.n_samples = int(self.histories.size()[0]) logging.info("\nConstructed 3D Lorenz dataset with: ") logging.info(" lookahead: {} | tau: {} | k: {}".format(self.lookahead, self.tau, self.k)) logging.info(" size of histories: {} | size of futures: {}\n".format(self.histories.size(), self.futures.size()))
def getEphID(sk): """Returns the current EphID to broadcast. It changes every L minutes. For example, the first EphID will be broadcasted for the first L minutes of the day; the second EphID will be broadcasted for the second L minutes of the day, and so on. :param sk: a {Public,Private}SK object storing the information about the user's SK, used for deciding where the file containing the EphIDs is supposed to be stored""" with open(os.path.join(sk.directory(), CIPHERTEXT_FILE), "rb") as f: ciphertext = f.read() ephids = ciphertext[IV_SIZE:] ephid_list = split_sequence(ephids, N) if not len(ephid_list) == N: raise ValueError('Not enough EphIDs in ciphertext') minutes_from_midnight = get_current_minutes() ephid = ephid_list[minutes_from_midnight // L] return ephid
def find_best_division(self, elements, clusters): """find the best record division Parameters ---------- elements: list a list of the HTML element clusters: list a list of the cluster id for each element in ``elements`` Returns ------- A list of ``Record`` """ assert len(elements) == len(clusters) if len(set(clusters)) == len(clusters): return None all_records = [] for c in set(clusters): records = [] for group in split_sequence(zip(elements, clusters), lambda x: x[1] == c): _clusters = [g[1] for g in group] if len(_clusters) < len(clusters): records.append(Record(*[g[0] for g in group])) if not records: continue similarities = [self.calculate_record_similarity(r1, r2) for r1, r2 in itertools.combinations(records, 2)] average_sim = sum(similarities) / (len(similarities) + 1) all_records.append([average_sim, records]) return max(all_records, key=operator.itemgetter(0))[1]
def __init__(self, data_dir, lookahead, tau, k, max_len, normalize=False): super(VanDerPol, self).__init__() self.data_dir = data_dir self.lookahead = lookahead self.tau = tau self.k = k self.max_len = max_len self.data_dim = 3 # load simulated data self.traj = torch.load(self.data_dir) # shorten trajectory if less data is wanted if len(self.traj) > self.max_len: self.traj = self.traj[:self.max_len] else: logging.warning("max_len = {} exceeds maximum length of dataset trajectory = {}".format(self.max_len, len( self.traj))) pass # split the trajectory self. histories, self.futures = split_sequence(self.traj, self.lookahead, self.tau, self.k) # normalize data if specified if normalize: dims = len(self.histories[0, 0, :]) for dim in range(dims): max_value = torch.max(self.histories[:, :, dim]) if normalize: self.histories[:, :, dim] = self.histories[:, :, dim] / max_value self.futures[:, :, dim] = self.futures[:, :, dim] / max_value self.n_samples = int(self.histories.size()[0]) logging.info("\nConstructed Van de Pol dataset with: ") logging.info(" lookahead: {} | tau: {} | k: {}".format(self.lookahead, self.tau, self.k)) logging.info(" size of histories: {} | size of futures: {}\n".format(self.histories.size(), self.futures.size()))
def stacked_lstm_multi_step_forecast(series, validation_series, input_length, horizon, del_outliers=False, normalize=False, plot=False): """ Perform forecasting of a time series using an lstm neural network. The network is trained using samples of shape input_length (corresponding to the last input_length days) to predict an array of horizon values (corresponding to horizon days). In this case, the network predicts horizon days at the time. Performance of the trained network is assessed on a validation series. The size of the validation series must be horizon. :param series: :param validation_series: :param input_length: :param horizon: :param del_outliers: :param normalize: :param plot: :return: SMAPE for the validation series, the forecast validation series """ # whether to remove outliers in the training series if del_outliers: working_series = remove_outliers(series) else: working_series = series # whether to normalize the training series if normalize: scaler, working_series = normalize_series(working_series) else: scaler = None # input sequence is our data, np.log1p is applied to the data and mae error is used to approximate SMAPE error train_series = np.log1p(working_series) # we use the last n_steps_in days as input and predict n_steps_out n_steps_in, n_steps_out = input_length, horizon # split into samples train_samples, train_targets = split_sequence(train_series, n_steps_in, n_steps_out) # here we work with the original series so only the actual values n_features = 1 train_samples = train_samples.reshape( (train_samples.shape[0], train_samples.shape[1], n_features)) # create the model model = Sequential() model.add( LSTM(256, activation='relu', input_shape=(n_steps_in, n_features))) # we predict n_steps_out values model.add(Dense(n_steps_out)) # we use 'mae' with data transformed with log1p and expm1 to approach SMAPE error model.compile(optimizer='adam', loss='mae') # fit model model.fit(train_samples, train_targets, epochs=200, verbose=0) # perform prediction # input is the last n_steps_in values of the train series (working_series is not log1p transformed) validation_in_sample = np.log1p( np.array(working_series.values[-n_steps_in:])) validation_in_sample = validation_in_sample.reshape( (1, n_steps_in, n_features)) validation_forecast = model.predict(validation_in_sample, verbose=0) # dataframe which contains the result forecast_dataframe = pd.DataFrame(index=validation_series.index) # if data was normalized, we need to apply the reverse transform if normalize: # first reverse log1p using expm1 validation_forecast = np.expm1(validation_forecast) # use scaler to reverse normalizing denormalized_forecast = scaler.inverse_transform( validation_forecast.reshape(-1, 1)) denormalized_forecast = [val[0] for val in denormalized_forecast] # save the forecast in the dataframe forecast_dataframe['forecast'] = denormalized_forecast else: # save the forecast in the dataframe forecast_dataframe['forecast'] = np.expm1(validation_forecast) if plot: plt.figure(figsize=(10, 6)) plt.plot(series[-100:], color="blue", linestyle="-") plt.plot(validation_series, color="green", linestyle="-") plt.plot(forecast_dataframe, color="red", linestyle="--") plt.legend(["Train series", "Validation series", "Predicted series"]) plt.title("Validation of LSTM with input size " + str(n_steps_in) + " output size " + str(n_steps_out)) plt.show() return smape( validation_series, forecast_dataframe['forecast']), forecast_dataframe['forecast']
def create_fit_model(window, columns_to_predict, columns_indicators, normalize_data=True, exchange="huobi"): set_tf_gpu(False) exchange_data, columns_equivalences = load_data(exchange=exchange) arc = MLArchitect(x_data=exchange_data, y_data=None, is_x_flat=True, save_x_path="saved_data/x.csv", display_indicators_callable=True, data_enhancement=True, columns_equivalences=columns_equivalences, save_normalize_x_model="saved_data/x_norm_model.mod", save_normalize_y_model="saved_data/y_norm_model.mod", save_y_path="saved_data/y.csv", y_restoration_routine="default", index_infer_datetime_format=True, pca_reductions=[('linear', 0.99)], columns_to_predict=columns_to_predict, window_prediction=window, columns_indicators=columns_indicators, test_size=0.01, ml_model=None, save_ml_path="models/ml_model.h5", is_parallel=False, disgard_last=True, window_tuple=(7, 14, 21, 6)) # arc = MLArchitect(x_data="saved_data/x.csv", y_data="saved_data/y.csv", # learning_indicators_callable=None, display_indicators_callable=None, # normalize_x_callable="saved_data/x_norm_model.mod", # normalize_y_callable="saved_data/y_norm_model.mod", # index_col='id', pca_reductions=[('linear', 0.99)], # window_prediction=4, test_size=0.20) if normalize_data: x, y = arc.get_normalized_data(arc.x, arc.y, None, arc.index_min, arc.index_max) else: x, y = arc.x.loc[arc.index_min:arc. index_max], arc.y.loc[arc.index_min:arc.index_max] n_steps_in, n_steps_out = 3, None n_shape, n = x.shape, 10 x_train, y_train = split_sequence(x.iloc[:-n].values, y.iloc[:-n].values, n_steps_in, n_steps_out) x_test, y_test = split_sequence(x.iloc[-n:].values, y.iloc[-n:].values, n_steps_in, n_steps_out) # Init the ML model n_input, n_features, n_output, n_init_neurons = x.shape[1], x.shape[ 1], y_train.shape[1], 100 # n_seq = 2 # n_steps_in = int(n_steps_in / n_seq) # x_train = x_train.reshape((x_train.shape[0], n_seq, n_steps_in, n_features)) # x_test = x_test.reshape((x_test.shape[0], n_seq, n_steps_in, n_features)) ml_model = MLArchitect.keras_build_model(0, 0, 0, n_input, n_output, n_steps_in, n_steps_out, n_features, n_init_neurons) ml_model.summary() arc.ml_init_model(ml_model) # Fit the model prefit_sample_data = exchange_data.loc[exchange_data.index[-500:], ['close', 'open', 'high', 'low']] history = arc.fit( x=x_train, y=y_train, epochs=10000, batch_size=50, verbose=1, shuffle=True, validation_split=0.2, prefit_sample_data=prefit_sample_data, prefit_simulation_size=10000, callbacks=[ keras.callbacks.EarlyStopping('loss', min_delta=1e-5, patience=200, verbose=1, restore_best_weights=True), keras.callbacks.ModelCheckpoint("models/best_model_2.h5", monitor='keras_r2_score', verbose=1, save_best_only=True, mode='max') ]) err_res = dict( zip(ml_model.metrics_names, ml_model.evaluate(x_test, y_test))) y_pred = arc.norm_output_inverse_transform(ml_model.predict( x_test)) if normalize_data else ml_model.predict(x_test) mde = arc.mean_directional_accuracy(y_test, y_pred.values) mae = arc.mean_absolute_error(y_test, y_pred.values) err_res.update({ 'mean_directional_accuracy': mde, 'mean_absolute_error': mae }) print(err_res) return arc, exchange_data
def nn_single_step_forecast(series, validation_series, input_length, horizon, del_outliers=False, normalize=False, plot=False): """ Perform forecasting of a time series using a simple neural network with a single 128 neurons hidden layer. The network is trained using samples of shape input_length (corresponding to the last input_length days) to predict an array of horizon values (corresponding to horizon days). In this case, the network predicts one day at the time. Performance of the trained network is assessed on a validation series. This is computed by repeating one day predictions and shifting the input values. The size of the validation series must be horizon. :param series: :param validation_series: :param input_length: :param horizon: :param del_outliers: :param normalize: :param plot: :return: SMAPE for the validation series, the forecast validation series """ # whether to remove outliers in the training series if del_outliers: working_series = remove_outliers(series) else: working_series = series # whether to normalize the training series if normalize: scaler, working_series = normalize_series(working_series) else: scaler = None # input sequence is our data, np.log1p is applied to the data and mae error is used to approximate SMAPE error train_series = np.log1p(working_series) # we use the last n_steps_in days as input and predict one step n_steps_in, n_steps_out = input_length, 1 # split into samples train_samples, train_targets = split_sequence(train_series, n_steps_in, n_steps_out) # create the model model = Sequential() model.add(Dense(128, activation='relu', input_dim=n_steps_in)) # we predict n_steps_out values model.add(Dense(n_steps_out)) # we use 'mae' with data transformed with log1p and expm1 to approach SMAPE error model.compile(optimizer='adam', loss='mae') # fit model model.fit(train_samples, train_targets, epochs=200, verbose=0) # perform prediction # we start by transforming the normalized series into log1p, new one day predictions will be added to this series # as we predict them and these predictions will be used for the next forecasting step working_series_values = np.log1p(working_series.values) # perform horizon predictions for i in range(horizon): validation_in_sample = np.array(working_series_values[-n_steps_in:]) validation_in_sample = validation_in_sample.reshape((1, n_steps_in)) validation_forecast = model.predict(validation_in_sample, verbose=0) working_series_values = np.append(working_series_values, validation_forecast) # take last horizon values from the series (this is the forecast for the validation series validation_forecast = working_series_values[-horizon:] # dataframe which contains the result forecast_dataframe = pd.DataFrame(index=validation_series.index) # if data was normalized, we need to apply the reverse transform if normalize: # first reverse log1p using expm1 validation_forecast = np.expm1(validation_forecast) # use scaler to reverse normalizing denormalized_forecast = scaler.inverse_transform( validation_forecast.reshape(-1, 1)) denormalized_forecast = [val[0] for val in denormalized_forecast] # save the forecast in the dataframe forecast_dataframe['forecast'] = denormalized_forecast else: # save the forecast in the dataframe forecast_dataframe['forecast'] = np.expm1(validation_forecast) if plot: plt.figure(figsize=(10, 6)) plt.plot(series[-100:], color="blue", linestyle="-") plt.plot(validation_series, color="green", linestyle="-") plt.plot(forecast_dataframe, color="red", linestyle="--") plt.legend(["Train series", "Validation series", "Predicted series"]) plt.title("Validation of single step NN with input size " + str(n_steps_in) + " output size " + str(n_steps_out)) plt.show() return smape( validation_series, forecast_dataframe['forecast']), forecast_dataframe['forecast']