Exemple #1
0
def main(is_adv):
    packets = (iv_list, ephid_list,
               tag_list) = read_packets()  # Read all the packets received
    print('#EphIDs:', len(packets[1]))

    keys = (public_key_list, sk_list) = read_keys(
    )  # Read all the public keys of infected users received
    print('#SK:', len(keys[1]))

    for (public_key, sk) in zip(*keys):
        encryptor = Encryptor(sk)
        for (iv, ephid, tag) in zip(*packets):
            ciphertext = encryptor.encrypt(
                iv=iv)  # Generate the EphIDs corresponding to each SK
            ephids = ciphertext[IV_SIZE:]
            blocks = split_sequence(ephids, N)
            advtag = tag[:-1] + token_bytes(
                1)  # The tag computed by an adversary
            # Check if one of the received EphIDs can be generated by SK
            if ephid not in blocks:
                continue
            else:
                # the tag to send is the received one itself if the user is honest
                tag_to_send = advtag if is_adv else tag
                print(ephid.hex())

                retval = verify(
                    public_key, ephid,
                    tag_to_send)  # True if the tag is honest, False otherwise
                print(retval)

                data = public_key + ephid + tag_to_send  # Send <pk,ephid,tag> to server
                send_data_to_server(data)
Exemple #2
0
    def find_division(self, elements, clusters, cluster):
        """ find the division with given cluster as seperator

        Parameters
        ----------
        elements: list
            a list of the HTML element
        clusters: list
            a list of the cluster id for each element in ``elements``
        cluster: int
            the cluster id to split the elements.

        Returns
        -------
        A list of ``Record``
        """
        assert len(elements) == len(clusters)

        if len(set(clusters)) == len(clusters):
            return None

        records = []

        for group in split_sequence(zip(elements, clusters), lambda x: x[1] == cluster):
            _clusters = [g[1] for g in group]
            if len(_clusters) < len(clusters):
                records.append(Record(*[g[0] for g in group]))

        return records
def predict(training_input, test_input):
    X, y = split_sequence(training_input, n_steps)

    model = fit_model(X, y)

    x_input = array(test_input)
    x_input = x_input.reshape((1, n_steps))

    pred = model.predict(x_input, verbose=0)
    print("MLP prediction: " + str(pred))
def predict(training_input, test_input):
    X, y = split_sequence(training_input, n_steps)

    X = X.reshape((X.shape[0], X.shape[1], n_features))

    model = fit_model(X, y)

    x_input = array(test_input)
    x_input = x_input.reshape((1, n_steps, n_features))

    pred = model.predict(x_input, verbose=0)
    print("GRU prediction: " + str(pred))
Exemple #5
0
    def __init__(self, data_dir, lookahead, tau, k, max_len, n_groups=1, dim=3, normalize=False):
        self.data_dir = data_dir
        self.lookahead = lookahead
        self.tau = tau
        self.k = k
        self.max_len = max_len
        self.data_dim = dim

        all_entire_trajectories = load_h5_data(data_dir)
        for n in range(n_groups):
            entire_trajectory = all_entire_trajectories[n][:, :dim]

            # shorten trajectory if less data is wanted
            if len(entire_trajectory) > self.max_len:
                entire_trajectory = entire_trajectory[:self.max_len]
            else:
                logging.warning("max_len = {} exceeds maximum length of dataset trajectory = {}".format(self.max_len, len(entire_trajectory)))
                pass

            # split trajectory and write it into futures and histories
            if n == 0:
                self.histories, self.futures = split_sequence(entire_trajectory, self.lookahead, self.tau, self.k)
            else:
                histories, futures = split_sequence(entire_trajectory, self.lookahead, self.tau, self.k)
                self.histories = torch.cat((self.histories, histories), dim=0)
                self.futures = torch.cat((self.futures, futures), dim=0)

        # normalize the data w.r.t. to the maximum value
        dims = len(self.histories[0, 0, :])
        for dim in range(dims):
            max_value = torch.max(self.histories[:, :,  dim])
            if normalize:
                self.histories[:, :, dim] = self.histories[:, :, dim]/max_value
                self.futures[:, :, dim] = self.futures[:, :, dim]/max_value

        self.n_samples = int(self.histories.size()[0])
        logging.info("\nConstructed 3D Lorenz dataset with: ")
        logging.info("     lookahead: {} | tau: {} | k: {}".format(self.lookahead, self.tau, self.k))
        logging.info("     size of histories: {} | size of futures: {}\n".format(self.histories.size(), self.futures.size()))
Exemple #6
0
def getEphID(sk):
    """Returns the current EphID to broadcast. It changes every L minutes.
    For example, the first EphID will be broadcasted for the first L minutes of the day;
    the second EphID will be broadcasted for the second L minutes of the day, and so on.
    :param sk: a {Public,Private}SK object storing the information about the user's SK,
        used for deciding where the file containing the EphIDs is supposed to be stored"""
    with open(os.path.join(sk.directory(), CIPHERTEXT_FILE), "rb") as f:
        ciphertext = f.read()
    ephids = ciphertext[IV_SIZE:]
    ephid_list = split_sequence(ephids, N)
    if not len(ephid_list) == N:
        raise ValueError('Not enough EphIDs in ciphertext')
    minutes_from_midnight = get_current_minutes()
    ephid = ephid_list[minutes_from_midnight // L]
    return ephid
Exemple #7
0
    def find_best_division(self, elements, clusters):
        """find the best record division

        Parameters
        ----------
        elements: list
            a list of the HTML element
        clusters: list
            a list of the cluster id for each element in ``elements``

        Returns
        -------
        A list of ``Record``
        """
        assert len(elements) == len(clusters)

        if len(set(clusters)) == len(clusters):
            return None

        all_records = []

        for c in set(clusters):
            records = []
            for group in split_sequence(zip(elements, clusters), lambda x: x[1] == c):
                _clusters = [g[1] for g in group]
                if len(_clusters) < len(clusters):
                    records.append(Record(*[g[0] for g in group]))

            if not records:
                continue

            similarities = [self.calculate_record_similarity(r1, r2) for r1, r2 in itertools.combinations(records, 2)]
            average_sim = sum(similarities) / (len(similarities) + 1)

            all_records.append([average_sim, records])

        return max(all_records, key=operator.itemgetter(0))[1]
Exemple #8
0
    def __init__(self, data_dir, lookahead, tau, k, max_len, normalize=False):
        super(VanDerPol, self).__init__()
        self.data_dir = data_dir
        self.lookahead = lookahead
        self.tau = tau
        self.k = k
        self.max_len = max_len
        self.data_dim = 3

        # load simulated data
        self.traj = torch.load(self.data_dir)

        # shorten trajectory if less data is wanted
        if len(self.traj) > self.max_len:
            self.traj = self.traj[:self.max_len]
        else:
            logging.warning("max_len = {} exceeds maximum length of dataset trajectory = {}".format(self.max_len, len(
                self.traj)))
            pass

        # split the trajectory
        self. histories, self.futures = split_sequence(self.traj, self.lookahead, self.tau, self.k)

        # normalize data if specified
        if normalize:
            dims = len(self.histories[0, 0, :])
            for dim in range(dims):
                max_value = torch.max(self.histories[:, :, dim])
                if normalize:
                    self.histories[:, :, dim] = self.histories[:, :, dim] / max_value
                    self.futures[:, :, dim] = self.futures[:, :, dim] / max_value

        self.n_samples = int(self.histories.size()[0])
        logging.info("\nConstructed Van de Pol dataset with: ")
        logging.info("     lookahead: {} | tau: {} | k: {}".format(self.lookahead, self.tau, self.k))
        logging.info("     size of histories: {} | size of futures: {}\n".format(self.histories.size(), self.futures.size()))
def stacked_lstm_multi_step_forecast(series,
                                     validation_series,
                                     input_length,
                                     horizon,
                                     del_outliers=False,
                                     normalize=False,
                                     plot=False):
    """
    Perform forecasting of a time series using an lstm neural network. The network is trained using samples of shape
    input_length (corresponding to the last input_length days) to predict an array of horizon values (corresponding to
    horizon days). In this case, the network predicts horizon days at the time. Performance of the trained network is
    assessed on a validation series. The size of the validation series must be horizon.

    :param series:
    :param validation_series:
    :param input_length:
    :param horizon:
    :param del_outliers:
    :param normalize:
    :param plot:
    :return: SMAPE for the validation series, the forecast validation series
    """

    # whether to remove outliers in the training series
    if del_outliers:
        working_series = remove_outliers(series)

    else:
        working_series = series

    # whether to normalize the training series
    if normalize:
        scaler, working_series = normalize_series(working_series)

    else:
        scaler = None

    # input sequence is our data, np.log1p is applied to the data and mae error is used to approximate SMAPE error
    train_series = np.log1p(working_series)

    # we use the last n_steps_in days as input and predict n_steps_out
    n_steps_in, n_steps_out = input_length, horizon

    # split into samples
    train_samples, train_targets = split_sequence(train_series, n_steps_in,
                                                  n_steps_out)

    # here we work with the original series so only the actual values
    n_features = 1
    train_samples = train_samples.reshape(
        (train_samples.shape[0], train_samples.shape[1], n_features))

    # create the model
    model = Sequential()
    model.add(
        LSTM(256, activation='relu', input_shape=(n_steps_in, n_features)))

    # we predict n_steps_out values
    model.add(Dense(n_steps_out))

    # we use 'mae' with data transformed with log1p and expm1 to approach SMAPE error
    model.compile(optimizer='adam', loss='mae')

    # fit model
    model.fit(train_samples, train_targets, epochs=200, verbose=0)

    # perform prediction

    # input is the last n_steps_in values of the train series (working_series is not log1p transformed)
    validation_in_sample = np.log1p(
        np.array(working_series.values[-n_steps_in:]))
    validation_in_sample = validation_in_sample.reshape(
        (1, n_steps_in, n_features))
    validation_forecast = model.predict(validation_in_sample, verbose=0)

    # dataframe which contains the result
    forecast_dataframe = pd.DataFrame(index=validation_series.index)

    # if data was normalized, we need to apply the reverse transform
    if normalize:

        # first reverse log1p using expm1
        validation_forecast = np.expm1(validation_forecast)

        # use scaler to reverse normalizing
        denormalized_forecast = scaler.inverse_transform(
            validation_forecast.reshape(-1, 1))
        denormalized_forecast = [val[0] for val in denormalized_forecast]

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = denormalized_forecast

    else:

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = np.expm1(validation_forecast)

    if plot:
        plt.figure(figsize=(10, 6))

        plt.plot(series[-100:], color="blue", linestyle="-")
        plt.plot(validation_series, color="green", linestyle="-")
        plt.plot(forecast_dataframe, color="red", linestyle="--")

        plt.legend(["Train series", "Validation series", "Predicted series"])

        plt.title("Validation of LSTM with input size " + str(n_steps_in) +
                  " output size " + str(n_steps_out))

        plt.show()

    return smape(
        validation_series,
        forecast_dataframe['forecast']), forecast_dataframe['forecast']
def create_fit_model(window,
                     columns_to_predict,
                     columns_indicators,
                     normalize_data=True,
                     exchange="huobi"):
    set_tf_gpu(False)
    exchange_data, columns_equivalences = load_data(exchange=exchange)
    arc = MLArchitect(x_data=exchange_data,
                      y_data=None,
                      is_x_flat=True,
                      save_x_path="saved_data/x.csv",
                      display_indicators_callable=True,
                      data_enhancement=True,
                      columns_equivalences=columns_equivalences,
                      save_normalize_x_model="saved_data/x_norm_model.mod",
                      save_normalize_y_model="saved_data/y_norm_model.mod",
                      save_y_path="saved_data/y.csv",
                      y_restoration_routine="default",
                      index_infer_datetime_format=True,
                      pca_reductions=[('linear', 0.99)],
                      columns_to_predict=columns_to_predict,
                      window_prediction=window,
                      columns_indicators=columns_indicators,
                      test_size=0.01,
                      ml_model=None,
                      save_ml_path="models/ml_model.h5",
                      is_parallel=False,
                      disgard_last=True,
                      window_tuple=(7, 14, 21, 6))

    # arc = MLArchitect(x_data="saved_data/x.csv", y_data="saved_data/y.csv",
    #                   learning_indicators_callable=None, display_indicators_callable=None,
    #                   normalize_x_callable="saved_data/x_norm_model.mod",
    #                   normalize_y_callable="saved_data/y_norm_model.mod",
    #                   index_col='id', pca_reductions=[('linear', 0.99)],
    #                   window_prediction=4, test_size=0.20)

    if normalize_data:
        x, y = arc.get_normalized_data(arc.x, arc.y, None, arc.index_min,
                                       arc.index_max)
    else:
        x, y = arc.x.loc[arc.index_min:arc.
                         index_max], arc.y.loc[arc.index_min:arc.index_max]

    n_steps_in, n_steps_out = 3, None
    n_shape, n = x.shape, 10

    x_train, y_train = split_sequence(x.iloc[:-n].values, y.iloc[:-n].values,
                                      n_steps_in, n_steps_out)
    x_test, y_test = split_sequence(x.iloc[-n:].values, y.iloc[-n:].values,
                                    n_steps_in, n_steps_out)

    # Init the ML model
    n_input, n_features, n_output, n_init_neurons = x.shape[1], x.shape[
        1], y_train.shape[1], 100

    # n_seq = 2
    # n_steps_in = int(n_steps_in / n_seq)
    # x_train = x_train.reshape((x_train.shape[0], n_seq, n_steps_in, n_features))
    # x_test = x_test.reshape((x_test.shape[0], n_seq, n_steps_in, n_features))

    ml_model = MLArchitect.keras_build_model(0, 0, 0, n_input, n_output,
                                             n_steps_in, n_steps_out,
                                             n_features, n_init_neurons)
    ml_model.summary()

    arc.ml_init_model(ml_model)

    # Fit the model
    prefit_sample_data = exchange_data.loc[exchange_data.index[-500:],
                                           ['close', 'open', 'high', 'low']]
    history = arc.fit(
        x=x_train,
        y=y_train,
        epochs=10000,
        batch_size=50,
        verbose=1,
        shuffle=True,
        validation_split=0.2,
        prefit_sample_data=prefit_sample_data,
        prefit_simulation_size=10000,
        callbacks=[
            keras.callbacks.EarlyStopping('loss',
                                          min_delta=1e-5,
                                          patience=200,
                                          verbose=1,
                                          restore_best_weights=True),
            keras.callbacks.ModelCheckpoint("models/best_model_2.h5",
                                            monitor='keras_r2_score',
                                            verbose=1,
                                            save_best_only=True,
                                            mode='max')
        ])

    err_res = dict(
        zip(ml_model.metrics_names, ml_model.evaluate(x_test, y_test)))
    y_pred = arc.norm_output_inverse_transform(ml_model.predict(
        x_test)) if normalize_data else ml_model.predict(x_test)

    mde = arc.mean_directional_accuracy(y_test, y_pred.values)
    mae = arc.mean_absolute_error(y_test, y_pred.values)
    err_res.update({
        'mean_directional_accuracy': mde,
        'mean_absolute_error': mae
    })
    print(err_res)

    return arc, exchange_data
Exemple #11
0
def nn_single_step_forecast(series,
                            validation_series,
                            input_length,
                            horizon,
                            del_outliers=False,
                            normalize=False,
                            plot=False):
    """
    Perform forecasting of a time series using a simple neural network with a single 128 neurons hidden layer.
    The network is trained using samples of shape input_length (corresponding to the last input_length days) to predict
    an array of horizon values (corresponding to horizon days). In this case, the network predicts one day at the time.
    Performance of the trained network is assessed on a validation series. This is computed by repeating one day
    predictions and shifting the input values. The size of the validation series must be horizon.

    :param series:
    :param validation_series:
    :param input_length:
    :param horizon:
    :param del_outliers:
    :param normalize:
    :param plot:
    :return: SMAPE for the validation series, the forecast validation series
    """

    # whether to remove outliers in the training series
    if del_outliers:
        working_series = remove_outliers(series)

    else:
        working_series = series

    # whether to normalize the training series
    if normalize:
        scaler, working_series = normalize_series(working_series)

    else:
        scaler = None

    # input sequence is our data, np.log1p is applied to the data and mae error is used to approximate SMAPE error
    train_series = np.log1p(working_series)

    # we use the last n_steps_in days as input and predict one step
    n_steps_in, n_steps_out = input_length, 1

    # split into samples
    train_samples, train_targets = split_sequence(train_series, n_steps_in,
                                                  n_steps_out)

    # create the model
    model = Sequential()
    model.add(Dense(128, activation='relu', input_dim=n_steps_in))

    # we predict n_steps_out values
    model.add(Dense(n_steps_out))

    # we use 'mae' with data transformed with log1p and expm1 to approach SMAPE error
    model.compile(optimizer='adam', loss='mae')

    # fit model
    model.fit(train_samples, train_targets, epochs=200, verbose=0)

    # perform prediction

    # we start by transforming the normalized series into log1p, new one day predictions will be added to this series
    # as we predict them and these predictions will be used for the next forecasting step
    working_series_values = np.log1p(working_series.values)

    # perform horizon predictions
    for i in range(horizon):
        validation_in_sample = np.array(working_series_values[-n_steps_in:])
        validation_in_sample = validation_in_sample.reshape((1, n_steps_in))

        validation_forecast = model.predict(validation_in_sample, verbose=0)

        working_series_values = np.append(working_series_values,
                                          validation_forecast)

    # take last horizon values from the series (this is the forecast for the validation series
    validation_forecast = working_series_values[-horizon:]

    # dataframe which contains the result
    forecast_dataframe = pd.DataFrame(index=validation_series.index)

    # if data was normalized, we need to apply the reverse transform
    if normalize:

        # first reverse log1p using expm1
        validation_forecast = np.expm1(validation_forecast)

        # use scaler to reverse normalizing
        denormalized_forecast = scaler.inverse_transform(
            validation_forecast.reshape(-1, 1))
        denormalized_forecast = [val[0] for val in denormalized_forecast]

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = denormalized_forecast

    else:

        # save the forecast in the dataframe
        forecast_dataframe['forecast'] = np.expm1(validation_forecast)

    if plot:
        plt.figure(figsize=(10, 6))

        plt.plot(series[-100:], color="blue", linestyle="-")
        plt.plot(validation_series, color="green", linestyle="-")
        plt.plot(forecast_dataframe, color="red", linestyle="--")

        plt.legend(["Train series", "Validation series", "Predicted series"])

        plt.title("Validation of single step NN with input size " +
                  str(n_steps_in) + " output size " + str(n_steps_out))

        plt.show()

    return smape(
        validation_series,
        forecast_dataframe['forecast']), forecast_dataframe['forecast']