def train(self, data):
        """
        Train a MLE attack to reconstruct an unknown sensitive value from a vector of known attributes
        :param data: type(DataFrame) A dataset of shape (n, k)
        """
        features = self._encode_data(data.drop(self.sensitiveAttribute,
                                               axis=1))
        labels = data[self.sensitiveAttribute].values

        n, k = features.shape

        # Center independent variables for better regression performance
        self.scaleFactor = mean(features, axis=0)
        featuresScaled = features - self.scaleFactor
        featuresScaled = concatenate(
            [ones((n, 1)), featuresScaled],
            axis=1)  # append all  ones for inclu intercept in beta vector

        # Get MLE for linear coefficients
        self.PredictionModel.fit(featuresScaled, labels)
        self.coefficients = self.PredictionModel.coef_
        self.sigma = sum(
            (labels - featuresScaled.dot(self.coefficients))**2) / (n - k)

        LOGGER.debug('Finished training regression model')
        self.trained = True
    async def time_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
        """
        If a delivery time has not been provided, prompt for one.
        :param step_context:
        :return DialogTurnResult:
        """
        LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: time step.")

        # Set the delivery destination to what they entered in response to the destination prompt.
        delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value]

        # capture the response from the previous step
        delivery.destination = step_context.result

        if delivery.time is None:
            message_text = messages.DELIVERY_TIME_PROMPT % (delivery.item, delivery.destination)

            prompt_options = PromptOptions(
                prompt=MessageFactory.text(
                    message_text,
                    message_text,
                    InputHints.expecting_input
                ),
                retry_prompt=MessageFactory.text(messages.VALID_DELIVERY_TIME_PROMPT),
            )
            return await step_context.prompt(DateTimePrompt.__name__, prompt_options)
        return await step_context.next(delivery.time)
    async def _create_delivery(self, step_context):
        recipient: ChannelAccount = step_context.context.activity.recipient
        delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value]

        data = await self.storage.read([recipient.id])

        # get or initialize this member's state
        member_state = data.get(recipient.id, {})
        if not member_state:
            member_state = {
                recipient.id: {}
            }

        delivery_list: DeliveryList = member_state.get(Keys.DELIVERY_LIST_STATE.value)

        if delivery_list:
            delivery_list.deliveries.append(delivery)
            delivery_list.turn_number = delivery_list.turn_number + 1

        else:
            delivery_list = DeliveryList()
            delivery_list.deliveries.append(delivery)
            delivery_list.turn_number = 1
            member_state[recipient.id][Keys.DELIVERY_LIST_STATE.value] = delivery_list

        try:
            await self.storage.write(member_state)
            LOGGER.debug(msg=f"Delivery persisted.")
        except Exception as e:
            LOGGER.error(msg=f"An error='{e}' has occurred while trying to schedule a delivery")
            await step_context.context.send_activity(messages.SOMETHING_WENT_WRONG)
Beispiel #4
0
    async def action_step(
            self, step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg="Main dialog action step")

        if not self.luis_recognizer.is_configured:
            # LUIS is not configured, we just use the choice step
            return await self._handle_action(step_context=step_context,
                                             action=step_context.result.value)

        # Call LUIS and gather any potential delivery details.
        # (Note the TurnContext has the response to the prompt.)
        intent, luis_result = await self.luis_recognizer.recognize(
            step_context.context)

        action: str = Action.UNKNOWN.value
        if intent == Intent.SALUTATION.value:
            action = Action.SALUTATION_ACKNOWLEDGEMENT.value

        elif intent == Intent.SALUTATION_ACKNOWLEDGEMENT.value:
            action = Action.ACTION_PROMPT.value

        elif intent == Intent.SCHEDULE_DELIVERY.value:
            action = Action.SCHEDULE_DELIVERY.value

        elif intent == Intent.LIST_DELIVERIES.value:
            action = Action.LIST_DELIVERIES.value

        elif intent == Intent.CANCEL.value:
            action = Action.EXIT.value

        return await self._handle_action(step_context=step_context,
                                         action=action)
    async def destination_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
        """
        If a delivery destination has not been provided, prompt for one.
        :param step_context:
        :return DialogTurnResult:
        """
        LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: destination step.")

        # Set the delivery item to what they entered in response to the create delivery prompt.
        delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value]

        # capture the response from the previous step
        delivery.item = step_context.result

        if delivery.destination is None:
            message_text = messages.DELIVERY_DESTINATION_PROMPT % delivery.item
            prompt_options = PromptOptions(
                prompt=MessageFactory.text(
                    message_text,
                    message_text,
                    InputHints.expecting_input
                )
            )
            return await step_context.prompt(TextPrompt.__name__, prompt_options)
        return await step_context.next(delivery.destination)
    def fit(self, data):
        assert isinstance(
            data, self.datatype
        ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}'
        assert len(
            list(data)
        ) >= 2, "BayesianNet requires at least 2 attributes(i.e., columns) in dataset."
        LOGGER.debug(
            f'Start training BayesianNet on data of shape {data.shape}...')
        if self.trained:
            self.trained = False
            self.DataDescriber = None
            self.bayesian_network = None
            self.conditional_probabilities = None

        self.DataDescriber = DataDescriber(self.metadata, self.histogram_bins,
                                           self.infer_ranges)
        self.DataDescriber.describe(data)

        encoded_df = DataFrame(columns=self.DataDescriber.attr_names)
        for attr_name, column in self.DataDescriber.attr_dict.items():
            encoded_df[attr_name] = column.encode_values_into_bin_idx()

        self.bayesian_network = self._greedy_bayes_linear(
            encoded_df, self.degree)

        self.conditional_probabilities = self._construct_conditional_probabilities(
            self.bayesian_network, encoded_df)

        LOGGER.debug(f'Finished training Bayesian net')
        self.trained = True
    async def list_deliveries(
            self, step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{ListDeliveriesDialog.__name__}: list deliveries")

        recipient: ChannelAccount = step_context.context.activity.recipient

        data = await self.storage.read([recipient.id])

        # get this member's state
        member_state = data.get(recipient.id, {})

        delivery_list: DeliveryList = member_state.get(
            Keys.DELIVERY_LIST_STATE.value)
        if delivery_list:
            deliveries: [Delivery] = delivery_list.deliveries
            for delivery in deliveries:
                DeliveryCard["body"][0]["text"] = delivery.item
                DeliveryCard["body"][1]["text"] = delivery.destination
                DeliveryCard["body"][2]["text"] = delivery.time
                message = Activity(
                    type=ActivityTypes.message,
                    attachments=[CardFactory.adaptive_card(DeliveryCard)],
                )
                await step_context.context.send_activity(message)
        else:
            await step_context.context.send_activity(messages.NO_DELIVERIES)
        return await step_context.end_dialog()
    async def confirm_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: confirmation step.")

        # Set the delivery destination to what they entered in response to the destination prompt.
        delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value]

        # capture the response from the previous step
        delivery.time = step_context.result[0].value

        message_text = f"""{
        messages.DELIVERY_SCHEDULED % (delivery.item, delivery.destination, delivery.time)}
        {messages.IS_THAT_ALL}"""

        prompt_options = PromptOptions(
            prompt=MessageFactory.text(message_text)
        )

        DeliveryCard["body"][0]["text"] = f"Item: {delivery.item}"
        DeliveryCard["body"][1]["text"] = f"Destination: {delivery.destination}"
        DeliveryCard["body"][2]["text"] = f"Time: {delivery.time}"

        await step_context.context.send_activity(
            Activity(
                type=ActivityTypes.message,
                text=MessageFactory.text(message_text),
                attachments=[
                    CardFactory.adaptive_card(DeliveryCard)
                ],
            )
        )

        return await step_context.prompt(ConfirmPrompt.__name__, prompt_options)
Beispiel #9
0
    def generate_samples(self, nsamples):
        """Generate random samples from the fitted Gaussian distribution"""
        assert self.trained, "Model must first be fitted to some data."

        LOGGER.debug(f'Generate synthetic dataset of size {nsamples}')
        synthetic_data = self.synthesiser.sample(nsamples)

        return synthetic_data
Beispiel #10
0
    async def on_continue_dialog(self,
                                 inner_dc: DialogContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{CancelAndHelpDialog.__name__}: on_continue_dialog")

        result = await self.interrupt(inner_dc)
        if result is not None:
            return result

        return await super(CancelAndHelpDialog,
                           self).on_continue_dialog(inner_dc)
Beispiel #11
0
    def fit(self, data):
        """Train a generative adversarial network on tabular data.
        Input data is assumed to be of shape (n_samples, n_features)
        See https://github.com/DAI-Lab/SDGym for details"""
        assert isinstance(data, self.datatype), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}'

        LOGGER.debug(f'Start fitting {self.__class__.__name__} to data of shape {data.shape}...')
        self.synthesiser.fit(data, self.metadata)

        LOGGER.debug(f'Finished fitting')
        self.trained = True
    def generate_samples(self, nsamples):
        assert self.trained, "Model must be fitted to some data first"

        LOGGER.debug(f'Generate synthetic dataset of size {nsamples}')
        synthetic_dataset = DataFrame(columns=self.DataDescriber.attr_names)
        for attr_name, Attr in self.DataDescriber.attr_dict.items():
            binning_indices = Attr.sample_binning_indices_in_independent_attribute_mode(
                nsamples)
            synthetic_dataset[
                attr_name] = Attr.sample_values_from_binning_indices(
                    binning_indices)

        LOGGER.debug(f'Generated synthetic dataset of size {nsamples}')
        return synthetic_dataset
Beispiel #13
0
 async def on_members_added_activity(self,
                                     members_added: List[ChannelAccount],
                                     turn_context: TurnContext):
     for member in members_added:
         if member.id != turn_context.activity.recipient.id:
             await turn_context.send_activity(
                 f"{messages.HELLO} {member.name}! {messages.BOT_INTRO_TEXT}."
             )
             LOGGER.debug(f"Welcome message sent to member='{member.id}'")
     return await DialogHelper.run_dialog(
         self.dialog,
         turn_context,
         self.conversation_state.create_property(DIALOG_STATE),
     )
    async def acknowledgement_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: acknowledgement step.")

        await self._create_delivery(step_context)
        if step_context.result:
            await step_context.context.send_activity(
                MessageFactory.text(messages.GOODBYE)
            )
            return await step_context.end_dialog()
        else:
            await step_context.context.send_activity(
                MessageFactory.text(messages.HAPPY_TO_HELP)
            )
            return await step_context.begin_dialog(self.id)
    def fit(self, data):
        assert isinstance(
            data, self.datatype
        ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}'
        LOGGER.debug(
            f'Start fitting IndependentHistogram model to data of shape {data.shape}...'
        )
        if self.trained:
            self.trained = False
            self.DataDescriber = None

        self.DataDescriber = DataDescriber(self.metadata, self.histogram_bins,
                                           self.infer_ranges)
        self.DataDescriber.describe(data)
        LOGGER.debug(f'Finished fitting IndependentHistogram')
        self.trained = True
Beispiel #16
0
    async def intro_step(
            self, step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"Main dialog intro step")

        prompt_options = PromptOptions(
            prompt=MessageFactory.text(""),
            choices=[
                Choice(Action.SCHEDULE_DELIVERY.value),
                Choice(Action.LIST_DELIVERIES.value),
                Choice(Action.EXIT.value)
            ])

        if not self.luis_recognizer.is_configured or self.luis_recognizer.luis_is_disabled:
            return await self._handle_luis_not_configured(
                step_context, prompt_options)

        return await step_context.prompt(TextPrompt.__name__, prompt_options)
    def __init__(self, configuration: DefaultConfig):
        self._recognizer = None

        self.luis_is_disabled = configuration.LUIS_IS_DISABLED
        self.luis_is_configured = (configuration.LUIS_APP_ID
                                   and configuration.LUIS_API_KEY
                                   and configuration.LUIS_API_HOST_NAME)
        if self.luis_is_configured:
            # Set the recognizer options depending on which endpoint version you want to use e.g
            # v2 or v3.
            luis_application = LuisApplication(
                configuration.LUIS_APP_ID,
                configuration.LUIS_API_KEY,
                "https://" + configuration.LUIS_API_HOST_NAME,
            )
            self._recognizer = LuisRecognizer(luis_application)
            self._recognizer.luis_trace_label = DeliverySchedulingRecognizer.__name__
            LOGGER.debug(msg="LUIS application configured and initialized")
    def train(self, data):
        """
        Train a Classifier to reconstruct an unknown sensitive label from a vector of known attributes
        :param data: type(DataFrame) A dataset of shape (n, k)
        """
        features = self._encode_data(data.drop(self.sensitiveAttribute,
                                               axis=1))
        labels = data[self.sensitiveAttribute].apply(
            lambda x: self.labels[x]).values

        # Feature normalisation
        self.scaleFactor = mean(features, axis=0)
        featuresScaled = features - self.scaleFactor

        # Get MLE for linear coefficients
        self.PredictionModel.fit(featuresScaled, labels)

        LOGGER.debug('Finished training regression model')
        self.trained = True
    def __init__(self, datatype, metadata, nbins=10, quids=None):
        assert datatype in [DataFrame], 'Unknown data type {}'.format(datatype)
        self.datatype = datatype
        self.nfeatures = 0

        self.cat_attributes = []
        self.num_attributes = []

        self.histogram_bins = {}
        self.category_codes = {}

        if quids is None:
            quids = []

        for cdict in metadata['columns']:
            attr_name = cdict['name']
            dtype = cdict['type']

            if dtype == FLOAT or dtype == INTEGER:
                if attr_name not in quids:
                    self.num_attributes.append(attr_name)
                    self.histogram_bins[attr_name] = linspace(
                        cdict['min'], cdict['max'], nbins + 1)
                    self.nfeatures += nbins
                else:
                    self.cat_attributes.append(attr_name)
                    cat_bins = cdict['bins']
                    cat_labels = [
                        f'({cat_bins[i]},{cat_bins[i+1]}]'
                        for i in range(len(cat_bins) - 1)
                    ]
                    self.category_codes[attr_name] = cat_labels
                    self.nfeatures += len(cat_labels)

            elif dtype == CATEGORICAL or dtype == ORDINAL:
                self.cat_attributes.append(attr_name)
                self.category_codes[attr_name] = cdict['i2s']
                self.nfeatures += len(cdict['i2s'])

        LOGGER.debug(f'Feature set will have length {self.nfeatures}')

        self.__name__ = 'Histogram'
Beispiel #20
0
    async def execute_luis_query(
            luis_recognizer: Recognizer,
            turn_context: TurnContext) -> (Intent, object):
        """
        Returns an object with pre-formatted LUIS results for the bot's dialogs to consume.
        """
        result = None
        intent = None

        try:
            LOGGER.debug(msg="Executing LUIS query")

            recognizer_result = await luis_recognizer.recognize(turn_context)
            intent = get_intent(recognizer_result=recognizer_result)

            LOGGER.debug(msg="LUIS query execution succeeded")
        except Exception as exception:
            LOGGER.error(
                msg=f"Executing LUIS query failed with an error={exception}")
        return intent, result
Beispiel #21
0
    async def interrupt(self, inner_dc: DialogContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{CancelAndHelpDialog.__name__}: interrupt")

        if inner_dc.context.activity.type == ActivityTypes.message:
            text = inner_dc.context.activity.text.lower()
            message = Activity(
                type=ActivityTypes.message,
                attachments=[CardFactory.adaptive_card(HelpCard)])

            if text in (Prompts.HELP.value, Prompts.QUESTION_MARK.value):
                await inner_dc.context.send_activity(message)
                return DialogTurnResult(DialogTurnStatus.Waiting)

            if text in (Prompts.CANCEL.value, Prompts.END.value,
                        Prompts.QUIT.value):
                cancel_message = MessageFactory.text(messages.CANCELLED,
                                                     messages.CANCELLED,
                                                     InputHints.ignoring_input)
                await inner_dc.context.send_activity(cancel_message)
                await inner_dc.cancel_all_dialogs()
                return await inner_dc.replace_dialog(self.initial_dialog_id)
        return None
    def generate_samples(self, nsamples):
        LOGGER.debug(f'Generate synthetic dataset of size {nsamples}')
        assert self.trained, "Model must be fitted to some real data first"
        synthetic_data = DataFrame(columns=self.DataDescriber.attr_names)

        # Get samples for attributes modelled in Bayesian net
        encoded_dataset = self._generate_encoded_dataset(nsamples)

        for attr in self.DataDescriber.attr_names:
            column = self.DataDescriber.attr_dict[attr]
            if attr in encoded_dataset:
                synthetic_data[
                    attr] = column.sample_values_from_binning_indices(
                        encoded_dataset[attr])
            else:
                # For attributes not in BN use independent attribute mode
                binning_indices = column.sample_binning_indices_in_independent_attribute_mode(
                    nsamples)
                synthetic_data[
                    attr] = column.sample_values_from_binning_indices(
                        binning_indices)

        return synthetic_data
    async def item_step(self, step_context: WaterfallStepContext) -> DialogTurnResult:
        """
        If a delivery item has not been provided, prompt for one.
        :param step_context:
        :return DialogTurnResult:
        """
        LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: item step.")

        # Create an object in which to collect the delivery information within the dialog.
        step_context.values[Keys.DELIVERY_DIALOG_STATE.value] = Delivery()

        delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value]

        if delivery.item is None:
            prompt_options = PromptOptions(
                prompt=MessageFactory.text(
                    messages.DELIVERY_ITEM_PROMPT,
                    messages.DELIVERY_ITEM_PROMPT,
                    InputHints.expecting_input
                )
            )
            return await step_context.prompt(TextPrompt.__name__, prompt_options)
        return await step_context.next(delivery.item)
Beispiel #24
0
    async def salute(self,
                     step_context: WaterfallStepContext) -> DialogTurnResult:
        LOGGER.debug(msg=f"{SalutationDialog.__name__}: salute")

        dialog_options: {} = step_context.options if step_context.options is not None else {}

        salutation_phase: SalutationPhase = dialog_options.get(
            Keys.SALUTATION_PHASE.value, SalutationPhase.INITIATE)

        message_text = f""
        if salutation_phase == SalutationPhase.INITIATE:
            message_text = f"{messages.HELLO}! {messages.HOW_ARE_YOU_DOING}"

        elif salutation_phase == SalutationPhase.ACKNOWLEDGE:
            message_text = f"{messages.SALUTATION_ACKNOWLEDGEMENT}. {messages.HOW_CAN_I_HELP}"

        elif salutation_phase == SalutationPhase.PROMPT:
            message_text = f"{messages.HOW_CAN_I_HELP}"

        await step_context.context.send_activity(
            MessageFactory.text(message_text, message_text,
                                InputHints.ignoring_input))
        return await step_context.end_dialog(self.id)
Beispiel #25
0
 async def on_message_activity(self, turn_context: TurnContext):
     LOGGER.debug(f"Message activity received. Context={turn_context}")
     return await DialogHelper.run_dialog(
         self.dialog, turn_context,
         self.conversation_state.create_property(DIALOG_STATE))
    def fit(self, data):
        """Fit a generative model of the training data distribution.
        :param data: DataFrame: Training set
        """
        assert isinstance(
            data, self.datatype
        ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}'

        # Clean up
        if self.trained:
            self._generator()
            self._discriminator()
            self.sess = tf.Session()
            self.trained = False

        LOGGER.debug(
            f'Start fitting {self.__class__.__name__} to data of shape {data.shape}...'
        )
        nsamples = len(data)
        features_train = self._encode_data(data)

        with tf.device(self.device_spec.to_string()):
            # Generator
            self.GDist = self.gen_out(self.Z)

            # Discriminator
            D_real = self.discriminator_out(self.X)
            D_fake = self.discriminator_out(self.GDist)
            D_entire = tf.concat(axis=0, values=[D_real, D_fake])

            # Replacement of Clipping algorithm to Penalty term
            # 1. Line 6 in Algorithm 1
            noisy_vals = tf.random_uniform([self.batch_size, 1],
                                           minval=0.,
                                           maxval=1.)
            X_inter = noisy_vals * self.X + (1. - noisy_vals) * self.GDist

            # 2. Line 7 in Algorithm 1
            grad = tf.gradients(self.discriminator_out(X_inter), [X_inter])[0]
            grad_norm = tf.sqrt(tf.reduce_sum(grad**2 + ZERO_TOL, axis=1))
            grad_pen = self.num_teachers * tf.reduce_mean((grad_norm - 1)**2)

            # Loss function
            discriminator_loss = tf.reduce_mean(
                (1 - self.M) * D_entire) - tf.reduce_mean(
                    self.M * D_entire) + grad_pen
            generator_loss = -tf.reduce_mean(D_fake)

            # Solver
            discriminator_solver = (tf.train.AdamOptimizer(
                learning_rate=self.learning_rate,
                beta1=0.5).minimize(discriminator_loss, var_list=self.theta_D))
            generator_solver = (tf.train.AdamOptimizer(
                learning_rate=self.learning_rate,
                beta1=0.5).minimize(generator_loss, var_list=self.theta_G))

            # Start session
            self.sess.run(tf.global_variables_initializer())

            # Training iterations
            for _ in range(self.n_iters):
                # TODO: Move dataset splitting here
                # For fixed generator weights run teacher training
                for _ in range(self.num_teachers):
                    # Sample latent vars
                    latent_batch = self._sample_latent_z(
                        self.batch_size, self.z_dim)

                    # Sample real
                    train_idx_teach = self._sample_real_x(
                        nsamples, self.batch_size
                    )  # Does this way of sampling satisfy DP? Should be disjoint subsets!
                    features_train_batch = features_train[train_idx_teach, :]

                    labels_real = np.ones([
                        self.batch_size,
                    ])
                    labels_fake = np.zeros([
                        self.batch_size,
                    ])

                    labels_batch = np.concatenate((labels_real, labels_fake),
                                                  0)

                    gaussian_noise = np.random.normal(
                        loc=0.0,
                        scale=self.laplace_noise_scale,
                        size=self.batch_size * 2)

                    labels_batch = labels_batch + gaussian_noise

                    labels_batch = (labels_batch > 0.5)

                    labels_batch = np.reshape(labels_batch.astype(float),
                                              (2 * self.batch_size, 1))

                    _, discriminator_loss_iter = self.sess.run(
                        [discriminator_solver, discriminator_loss],
                        feed_dict={
                            self.X: features_train_batch,
                            self.Z: latent_batch,
                            self.M: labels_batch
                        })

                # Update generator weights
                latent_batch = self._sample_latent_z(self.batch_size,
                                                     self.z_dim)

                _, generator_loss_iter = self.sess.run(
                    [generator_solver, generator_loss],
                    feed_dict={self.Z: latent_batch})

        self.trained = True