def train(self, data): """ Train a MLE attack to reconstruct an unknown sensitive value from a vector of known attributes :param data: type(DataFrame) A dataset of shape (n, k) """ features = self._encode_data(data.drop(self.sensitiveAttribute, axis=1)) labels = data[self.sensitiveAttribute].values n, k = features.shape # Center independent variables for better regression performance self.scaleFactor = mean(features, axis=0) featuresScaled = features - self.scaleFactor featuresScaled = concatenate( [ones((n, 1)), featuresScaled], axis=1) # append all ones for inclu intercept in beta vector # Get MLE for linear coefficients self.PredictionModel.fit(featuresScaled, labels) self.coefficients = self.PredictionModel.coef_ self.sigma = sum( (labels - featuresScaled.dot(self.coefficients))**2) / (n - k) LOGGER.debug('Finished training regression model') self.trained = True
async def time_step(self, step_context: WaterfallStepContext) -> DialogTurnResult: """ If a delivery time has not been provided, prompt for one. :param step_context: :return DialogTurnResult: """ LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: time step.") # Set the delivery destination to what they entered in response to the destination prompt. delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value] # capture the response from the previous step delivery.destination = step_context.result if delivery.time is None: message_text = messages.DELIVERY_TIME_PROMPT % (delivery.item, delivery.destination) prompt_options = PromptOptions( prompt=MessageFactory.text( message_text, message_text, InputHints.expecting_input ), retry_prompt=MessageFactory.text(messages.VALID_DELIVERY_TIME_PROMPT), ) return await step_context.prompt(DateTimePrompt.__name__, prompt_options) return await step_context.next(delivery.time)
async def _create_delivery(self, step_context): recipient: ChannelAccount = step_context.context.activity.recipient delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value] data = await self.storage.read([recipient.id]) # get or initialize this member's state member_state = data.get(recipient.id, {}) if not member_state: member_state = { recipient.id: {} } delivery_list: DeliveryList = member_state.get(Keys.DELIVERY_LIST_STATE.value) if delivery_list: delivery_list.deliveries.append(delivery) delivery_list.turn_number = delivery_list.turn_number + 1 else: delivery_list = DeliveryList() delivery_list.deliveries.append(delivery) delivery_list.turn_number = 1 member_state[recipient.id][Keys.DELIVERY_LIST_STATE.value] = delivery_list try: await self.storage.write(member_state) LOGGER.debug(msg=f"Delivery persisted.") except Exception as e: LOGGER.error(msg=f"An error='{e}' has occurred while trying to schedule a delivery") await step_context.context.send_activity(messages.SOMETHING_WENT_WRONG)
async def action_step( self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg="Main dialog action step") if not self.luis_recognizer.is_configured: # LUIS is not configured, we just use the choice step return await self._handle_action(step_context=step_context, action=step_context.result.value) # Call LUIS and gather any potential delivery details. # (Note the TurnContext has the response to the prompt.) intent, luis_result = await self.luis_recognizer.recognize( step_context.context) action: str = Action.UNKNOWN.value if intent == Intent.SALUTATION.value: action = Action.SALUTATION_ACKNOWLEDGEMENT.value elif intent == Intent.SALUTATION_ACKNOWLEDGEMENT.value: action = Action.ACTION_PROMPT.value elif intent == Intent.SCHEDULE_DELIVERY.value: action = Action.SCHEDULE_DELIVERY.value elif intent == Intent.LIST_DELIVERIES.value: action = Action.LIST_DELIVERIES.value elif intent == Intent.CANCEL.value: action = Action.EXIT.value return await self._handle_action(step_context=step_context, action=action)
async def destination_step(self, step_context: WaterfallStepContext) -> DialogTurnResult: """ If a delivery destination has not been provided, prompt for one. :param step_context: :return DialogTurnResult: """ LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: destination step.") # Set the delivery item to what they entered in response to the create delivery prompt. delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value] # capture the response from the previous step delivery.item = step_context.result if delivery.destination is None: message_text = messages.DELIVERY_DESTINATION_PROMPT % delivery.item prompt_options = PromptOptions( prompt=MessageFactory.text( message_text, message_text, InputHints.expecting_input ) ) return await step_context.prompt(TextPrompt.__name__, prompt_options) return await step_context.next(delivery.destination)
def fit(self, data): assert isinstance( data, self.datatype ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}' assert len( list(data) ) >= 2, "BayesianNet requires at least 2 attributes(i.e., columns) in dataset." LOGGER.debug( f'Start training BayesianNet on data of shape {data.shape}...') if self.trained: self.trained = False self.DataDescriber = None self.bayesian_network = None self.conditional_probabilities = None self.DataDescriber = DataDescriber(self.metadata, self.histogram_bins, self.infer_ranges) self.DataDescriber.describe(data) encoded_df = DataFrame(columns=self.DataDescriber.attr_names) for attr_name, column in self.DataDescriber.attr_dict.items(): encoded_df[attr_name] = column.encode_values_into_bin_idx() self.bayesian_network = self._greedy_bayes_linear( encoded_df, self.degree) self.conditional_probabilities = self._construct_conditional_probabilities( self.bayesian_network, encoded_df) LOGGER.debug(f'Finished training Bayesian net') self.trained = True
async def list_deliveries( self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg=f"{ListDeliveriesDialog.__name__}: list deliveries") recipient: ChannelAccount = step_context.context.activity.recipient data = await self.storage.read([recipient.id]) # get this member's state member_state = data.get(recipient.id, {}) delivery_list: DeliveryList = member_state.get( Keys.DELIVERY_LIST_STATE.value) if delivery_list: deliveries: [Delivery] = delivery_list.deliveries for delivery in deliveries: DeliveryCard["body"][0]["text"] = delivery.item DeliveryCard["body"][1]["text"] = delivery.destination DeliveryCard["body"][2]["text"] = delivery.time message = Activity( type=ActivityTypes.message, attachments=[CardFactory.adaptive_card(DeliveryCard)], ) await step_context.context.send_activity(message) else: await step_context.context.send_activity(messages.NO_DELIVERIES) return await step_context.end_dialog()
async def confirm_step(self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: confirmation step.") # Set the delivery destination to what they entered in response to the destination prompt. delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value] # capture the response from the previous step delivery.time = step_context.result[0].value message_text = f"""{ messages.DELIVERY_SCHEDULED % (delivery.item, delivery.destination, delivery.time)} {messages.IS_THAT_ALL}""" prompt_options = PromptOptions( prompt=MessageFactory.text(message_text) ) DeliveryCard["body"][0]["text"] = f"Item: {delivery.item}" DeliveryCard["body"][1]["text"] = f"Destination: {delivery.destination}" DeliveryCard["body"][2]["text"] = f"Time: {delivery.time}" await step_context.context.send_activity( Activity( type=ActivityTypes.message, text=MessageFactory.text(message_text), attachments=[ CardFactory.adaptive_card(DeliveryCard) ], ) ) return await step_context.prompt(ConfirmPrompt.__name__, prompt_options)
def generate_samples(self, nsamples): """Generate random samples from the fitted Gaussian distribution""" assert self.trained, "Model must first be fitted to some data." LOGGER.debug(f'Generate synthetic dataset of size {nsamples}') synthetic_data = self.synthesiser.sample(nsamples) return synthetic_data
async def on_continue_dialog(self, inner_dc: DialogContext) -> DialogTurnResult: LOGGER.debug(msg=f"{CancelAndHelpDialog.__name__}: on_continue_dialog") result = await self.interrupt(inner_dc) if result is not None: return result return await super(CancelAndHelpDialog, self).on_continue_dialog(inner_dc)
def fit(self, data): """Train a generative adversarial network on tabular data. Input data is assumed to be of shape (n_samples, n_features) See https://github.com/DAI-Lab/SDGym for details""" assert isinstance(data, self.datatype), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}' LOGGER.debug(f'Start fitting {self.__class__.__name__} to data of shape {data.shape}...') self.synthesiser.fit(data, self.metadata) LOGGER.debug(f'Finished fitting') self.trained = True
def generate_samples(self, nsamples): assert self.trained, "Model must be fitted to some data first" LOGGER.debug(f'Generate synthetic dataset of size {nsamples}') synthetic_dataset = DataFrame(columns=self.DataDescriber.attr_names) for attr_name, Attr in self.DataDescriber.attr_dict.items(): binning_indices = Attr.sample_binning_indices_in_independent_attribute_mode( nsamples) synthetic_dataset[ attr_name] = Attr.sample_values_from_binning_indices( binning_indices) LOGGER.debug(f'Generated synthetic dataset of size {nsamples}') return synthetic_dataset
async def on_members_added_activity(self, members_added: List[ChannelAccount], turn_context: TurnContext): for member in members_added: if member.id != turn_context.activity.recipient.id: await turn_context.send_activity( f"{messages.HELLO} {member.name}! {messages.BOT_INTRO_TEXT}." ) LOGGER.debug(f"Welcome message sent to member='{member.id}'") return await DialogHelper.run_dialog( self.dialog, turn_context, self.conversation_state.create_property(DIALOG_STATE), )
async def acknowledgement_step(self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: acknowledgement step.") await self._create_delivery(step_context) if step_context.result: await step_context.context.send_activity( MessageFactory.text(messages.GOODBYE) ) return await step_context.end_dialog() else: await step_context.context.send_activity( MessageFactory.text(messages.HAPPY_TO_HELP) ) return await step_context.begin_dialog(self.id)
def fit(self, data): assert isinstance( data, self.datatype ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}' LOGGER.debug( f'Start fitting IndependentHistogram model to data of shape {data.shape}...' ) if self.trained: self.trained = False self.DataDescriber = None self.DataDescriber = DataDescriber(self.metadata, self.histogram_bins, self.infer_ranges) self.DataDescriber.describe(data) LOGGER.debug(f'Finished fitting IndependentHistogram') self.trained = True
async def intro_step( self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg=f"Main dialog intro step") prompt_options = PromptOptions( prompt=MessageFactory.text(""), choices=[ Choice(Action.SCHEDULE_DELIVERY.value), Choice(Action.LIST_DELIVERIES.value), Choice(Action.EXIT.value) ]) if not self.luis_recognizer.is_configured or self.luis_recognizer.luis_is_disabled: return await self._handle_luis_not_configured( step_context, prompt_options) return await step_context.prompt(TextPrompt.__name__, prompt_options)
def __init__(self, configuration: DefaultConfig): self._recognizer = None self.luis_is_disabled = configuration.LUIS_IS_DISABLED self.luis_is_configured = (configuration.LUIS_APP_ID and configuration.LUIS_API_KEY and configuration.LUIS_API_HOST_NAME) if self.luis_is_configured: # Set the recognizer options depending on which endpoint version you want to use e.g # v2 or v3. luis_application = LuisApplication( configuration.LUIS_APP_ID, configuration.LUIS_API_KEY, "https://" + configuration.LUIS_API_HOST_NAME, ) self._recognizer = LuisRecognizer(luis_application) self._recognizer.luis_trace_label = DeliverySchedulingRecognizer.__name__ LOGGER.debug(msg="LUIS application configured and initialized")
def train(self, data): """ Train a Classifier to reconstruct an unknown sensitive label from a vector of known attributes :param data: type(DataFrame) A dataset of shape (n, k) """ features = self._encode_data(data.drop(self.sensitiveAttribute, axis=1)) labels = data[self.sensitiveAttribute].apply( lambda x: self.labels[x]).values # Feature normalisation self.scaleFactor = mean(features, axis=0) featuresScaled = features - self.scaleFactor # Get MLE for linear coefficients self.PredictionModel.fit(featuresScaled, labels) LOGGER.debug('Finished training regression model') self.trained = True
def __init__(self, datatype, metadata, nbins=10, quids=None): assert datatype in [DataFrame], 'Unknown data type {}'.format(datatype) self.datatype = datatype self.nfeatures = 0 self.cat_attributes = [] self.num_attributes = [] self.histogram_bins = {} self.category_codes = {} if quids is None: quids = [] for cdict in metadata['columns']: attr_name = cdict['name'] dtype = cdict['type'] if dtype == FLOAT or dtype == INTEGER: if attr_name not in quids: self.num_attributes.append(attr_name) self.histogram_bins[attr_name] = linspace( cdict['min'], cdict['max'], nbins + 1) self.nfeatures += nbins else: self.cat_attributes.append(attr_name) cat_bins = cdict['bins'] cat_labels = [ f'({cat_bins[i]},{cat_bins[i+1]}]' for i in range(len(cat_bins) - 1) ] self.category_codes[attr_name] = cat_labels self.nfeatures += len(cat_labels) elif dtype == CATEGORICAL or dtype == ORDINAL: self.cat_attributes.append(attr_name) self.category_codes[attr_name] = cdict['i2s'] self.nfeatures += len(cdict['i2s']) LOGGER.debug(f'Feature set will have length {self.nfeatures}') self.__name__ = 'Histogram'
async def execute_luis_query( luis_recognizer: Recognizer, turn_context: TurnContext) -> (Intent, object): """ Returns an object with pre-formatted LUIS results for the bot's dialogs to consume. """ result = None intent = None try: LOGGER.debug(msg="Executing LUIS query") recognizer_result = await luis_recognizer.recognize(turn_context) intent = get_intent(recognizer_result=recognizer_result) LOGGER.debug(msg="LUIS query execution succeeded") except Exception as exception: LOGGER.error( msg=f"Executing LUIS query failed with an error={exception}") return intent, result
async def interrupt(self, inner_dc: DialogContext) -> DialogTurnResult: LOGGER.debug(msg=f"{CancelAndHelpDialog.__name__}: interrupt") if inner_dc.context.activity.type == ActivityTypes.message: text = inner_dc.context.activity.text.lower() message = Activity( type=ActivityTypes.message, attachments=[CardFactory.adaptive_card(HelpCard)]) if text in (Prompts.HELP.value, Prompts.QUESTION_MARK.value): await inner_dc.context.send_activity(message) return DialogTurnResult(DialogTurnStatus.Waiting) if text in (Prompts.CANCEL.value, Prompts.END.value, Prompts.QUIT.value): cancel_message = MessageFactory.text(messages.CANCELLED, messages.CANCELLED, InputHints.ignoring_input) await inner_dc.context.send_activity(cancel_message) await inner_dc.cancel_all_dialogs() return await inner_dc.replace_dialog(self.initial_dialog_id) return None
def generate_samples(self, nsamples): LOGGER.debug(f'Generate synthetic dataset of size {nsamples}') assert self.trained, "Model must be fitted to some real data first" synthetic_data = DataFrame(columns=self.DataDescriber.attr_names) # Get samples for attributes modelled in Bayesian net encoded_dataset = self._generate_encoded_dataset(nsamples) for attr in self.DataDescriber.attr_names: column = self.DataDescriber.attr_dict[attr] if attr in encoded_dataset: synthetic_data[ attr] = column.sample_values_from_binning_indices( encoded_dataset[attr]) else: # For attributes not in BN use independent attribute mode binning_indices = column.sample_binning_indices_in_independent_attribute_mode( nsamples) synthetic_data[ attr] = column.sample_values_from_binning_indices( binning_indices) return synthetic_data
async def item_step(self, step_context: WaterfallStepContext) -> DialogTurnResult: """ If a delivery item has not been provided, prompt for one. :param step_context: :return DialogTurnResult: """ LOGGER.debug(msg=f"{CreateDeliveryDialog.__name__}: item step.") # Create an object in which to collect the delivery information within the dialog. step_context.values[Keys.DELIVERY_DIALOG_STATE.value] = Delivery() delivery: Delivery = step_context.values[Keys.DELIVERY_DIALOG_STATE.value] if delivery.item is None: prompt_options = PromptOptions( prompt=MessageFactory.text( messages.DELIVERY_ITEM_PROMPT, messages.DELIVERY_ITEM_PROMPT, InputHints.expecting_input ) ) return await step_context.prompt(TextPrompt.__name__, prompt_options) return await step_context.next(delivery.item)
async def salute(self, step_context: WaterfallStepContext) -> DialogTurnResult: LOGGER.debug(msg=f"{SalutationDialog.__name__}: salute") dialog_options: {} = step_context.options if step_context.options is not None else {} salutation_phase: SalutationPhase = dialog_options.get( Keys.SALUTATION_PHASE.value, SalutationPhase.INITIATE) message_text = f"" if salutation_phase == SalutationPhase.INITIATE: message_text = f"{messages.HELLO}! {messages.HOW_ARE_YOU_DOING}" elif salutation_phase == SalutationPhase.ACKNOWLEDGE: message_text = f"{messages.SALUTATION_ACKNOWLEDGEMENT}. {messages.HOW_CAN_I_HELP}" elif salutation_phase == SalutationPhase.PROMPT: message_text = f"{messages.HOW_CAN_I_HELP}" await step_context.context.send_activity( MessageFactory.text(message_text, message_text, InputHints.ignoring_input)) return await step_context.end_dialog(self.id)
async def on_message_activity(self, turn_context: TurnContext): LOGGER.debug(f"Message activity received. Context={turn_context}") return await DialogHelper.run_dialog( self.dialog, turn_context, self.conversation_state.create_property(DIALOG_STATE))
def fit(self, data): """Fit a generative model of the training data distribution. :param data: DataFrame: Training set """ assert isinstance( data, self.datatype ), f'{self.__class__.__name__} expects {self.datatype} as input data but got {type(data)}' # Clean up if self.trained: self._generator() self._discriminator() self.sess = tf.Session() self.trained = False LOGGER.debug( f'Start fitting {self.__class__.__name__} to data of shape {data.shape}...' ) nsamples = len(data) features_train = self._encode_data(data) with tf.device(self.device_spec.to_string()): # Generator self.GDist = self.gen_out(self.Z) # Discriminator D_real = self.discriminator_out(self.X) D_fake = self.discriminator_out(self.GDist) D_entire = tf.concat(axis=0, values=[D_real, D_fake]) # Replacement of Clipping algorithm to Penalty term # 1. Line 6 in Algorithm 1 noisy_vals = tf.random_uniform([self.batch_size, 1], minval=0., maxval=1.) X_inter = noisy_vals * self.X + (1. - noisy_vals) * self.GDist # 2. Line 7 in Algorithm 1 grad = tf.gradients(self.discriminator_out(X_inter), [X_inter])[0] grad_norm = tf.sqrt(tf.reduce_sum(grad**2 + ZERO_TOL, axis=1)) grad_pen = self.num_teachers * tf.reduce_mean((grad_norm - 1)**2) # Loss function discriminator_loss = tf.reduce_mean( (1 - self.M) * D_entire) - tf.reduce_mean( self.M * D_entire) + grad_pen generator_loss = -tf.reduce_mean(D_fake) # Solver discriminator_solver = (tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.5).minimize(discriminator_loss, var_list=self.theta_D)) generator_solver = (tf.train.AdamOptimizer( learning_rate=self.learning_rate, beta1=0.5).minimize(generator_loss, var_list=self.theta_G)) # Start session self.sess.run(tf.global_variables_initializer()) # Training iterations for _ in range(self.n_iters): # TODO: Move dataset splitting here # For fixed generator weights run teacher training for _ in range(self.num_teachers): # Sample latent vars latent_batch = self._sample_latent_z( self.batch_size, self.z_dim) # Sample real train_idx_teach = self._sample_real_x( nsamples, self.batch_size ) # Does this way of sampling satisfy DP? Should be disjoint subsets! features_train_batch = features_train[train_idx_teach, :] labels_real = np.ones([ self.batch_size, ]) labels_fake = np.zeros([ self.batch_size, ]) labels_batch = np.concatenate((labels_real, labels_fake), 0) gaussian_noise = np.random.normal( loc=0.0, scale=self.laplace_noise_scale, size=self.batch_size * 2) labels_batch = labels_batch + gaussian_noise labels_batch = (labels_batch > 0.5) labels_batch = np.reshape(labels_batch.astype(float), (2 * self.batch_size, 1)) _, discriminator_loss_iter = self.sess.run( [discriminator_solver, discriminator_loss], feed_dict={ self.X: features_train_batch, self.Z: latent_batch, self.M: labels_batch }) # Update generator weights latent_batch = self._sample_latent_z(self.batch_size, self.z_dim) _, generator_loss_iter = self.sess.run( [generator_solver, generator_loss], feed_dict={self.Z: latent_batch}) self.trained = True