def predict(self, domains, probability=False): """This function accepts cudf series of domains as an argument to classify domain names as benign/malicious and returns the learned label for each object in the form of cudf series. :param domains: List of domains. :type domains: cudf.Series :return: Predicted results with respect to given domains. :rtype: cudf.Series Examples -------- >>> dd.predict(['nvidia.com', 'dgadomain']) 0 0.010 1 0.924 Name: dga_probability, dtype: decimal """ df = cudf.DataFrame({"domain": domains}) domains_len = df["domain"].count() temp_df = utils.str2ascii(df, domains_len) # Assigning sorted domains index to return learned labels as per the given input order. df.index = temp_df.index df["domain"] = temp_df["domain"] temp_df = temp_df.drop("domain", axis=1) input, seq_lengths = self._create_variables(temp_df) del temp_df model_result = self.model(input, seq_lengths) if probability: model_result = model_result[:, 0] preds = torch.sigmoid(model_result) preds = preds.view(-1).tolist() df["preds"] = preds else: preds = model_result.data.max(1, keepdim=True)[1] preds = preds.view(-1).tolist() df["preds"] = preds df = df.sort_index() return df["preds"]
def predict(self, domains, probability=False, truncate=100): """This function accepts cudf series of domains as an argument to classify domain names as benign/malicious and returns the learned label for each object in the form of cudf series. :param domains: List of domains. :type domains: cudf.Series :return: Predicted results with respect to given domains. :rtype: cudf.Series :param truncate: Truncate string to n number of characters. :type truncate: int Examples -------- >>> dd.predict(['nvidia.com', 'dgadomain']) 0 0.010 1 0.924 Name: dga_probability, dtype: decimal """ log.debug("Initiating model inference ...") self.model.eval() df = cudf.DataFrame({"domain": domains}) log.debug('Truncate domains to width: {}'.format(truncate)) df['domain'] = df['domain'].str.slice_replace(truncate, repl='') temp_df = utils.str2ascii(df, 'domain') # Assigning sorted domains index to return learned labels as per the given input order. df.index = temp_df.index df["domain"] = temp_df["domain"] temp_df = temp_df.drop("domain", axis=1) input, seq_lengths = self._create_variables(temp_df) del temp_df model_result = self.model(input, seq_lengths) if probability: model_result = model_result[:, 0] preds = torch.sigmoid(model_result) preds = preds.view(-1).tolist() df["preds"] = preds else: preds = model_result.data.max(1, keepdim=True)[1] preds = preds.view(-1).tolist() df["preds"] = preds df = df.sort_index() return df["preds"]
def __preprocess(self, df, truncate): df['domain'] = df['domain'].str.slice_replace(truncate, repl='') df = utils.str2ascii(df, 'domain') return df
def __preprocess(self, df): df = utils.str2ascii(df, df.shape[0]) return df
def test_str2ascii(): actual_output_df = utils.str2ascii(test_input_df, 'domain') assert actual_output_df.equals(expected_output_df)
def __preprocess(self, df): df = utils.str2ascii(df, 'domain') return df