def predict_phishing_words(model_name, model_store_type, email_subject, email_body, min_text_length, label_threshold, word_threshold, top_word_limit, is_return_error): model_data = get_model_data(model_name, model_store_type) model = demisto_ml.decode_model(model_data) text = "%s %s" % (email_subject, email_body) res = demisto.executeCommand('WordTokenizerNLP', {'value': text, 'hashWordWithSeed': demisto.args().get('hashSeed')}) if is_error(res[0]): return_error(res[0]['Contents']) tokenized_text_result = res[0]['Contents'] input_text = tokenized_text_result['hashedTokenizedText'] if tokenized_text_result.get('hashedTokenizedText') else \ tokenized_text_result['tokenizedText'] filtered_text, filtered_text_number_of_words = demisto_ml.filter_model_words(input_text, model) if filtered_text_number_of_words == 0: handle_error("The model does not contains any of the input text words", is_return_error) if filtered_text_number_of_words < min_text_length: handle_error("The model contains less then %d words" % min_text_length, is_return_error) explain_result = demisto_ml.explain_model_words(model, input_text, 0, word_threshold, top_word_limit) if explain_result["Probability"] < label_threshold: handle_error("Label probability is %.2f and it's below the input threshold", is_return_error) if tokenized_text_result.get('hashedTokenizedText'): hash_word_to_plain = dict( zip(tokenized_text_result['hashedTokenizedText'].split(" "), tokenized_text_result['tokenizedText'].split(" "))) explain_result['PositiveWords'] = map(lambda x: hash_word_to_plain[x], explain_result['PositiveWords']) explain_result['NegativeWords'] = map(lambda x: hash_word_to_plain[x], explain_result['NegativeWords']) explain_result['OriginalText'] = tokenized_text_result['originalText'] explain_result['TextTokensHighlighted'] = tokenized_text_result['tokenizedText'] res = demisto.executeCommand('HighlightWords', {'text': tokenized_text_result['tokenizedText'], 'terms': ",".join(explain_result['PositiveWords'])}) res = res[0] if not is_error(res): highlighted_text_markdown = res['Contents'] explain_result['TextTokensHighlighted'] = highlighted_text_markdown explain_result_hr = dict(explain_result) explain_result_hr['PositiveWords'] = ", ".join(explain_result_hr['PositiveWords']) explain_result_hr['NegativeWords'] = ", ".join(explain_result_hr['NegativeWords']) explain_result_hr['Probability'] = "%.2f" % explain_result_hr['Probability'] return { 'Type': entryTypes['note'], 'Contents': explain_result, 'ContentsFormat': formats['json'], 'HumanReadable': tableToMarkdown('DBot Predict Phishing Words', explain_result_hr, headers=['TextTokensHighlighted', 'Label', 'Probability', 'PositiveWords', 'NegativeWords'], removeNull=True), 'HumanReadableFormat': formats['markdown'], 'EntryContext': { 'DBotPredictPhishingWords': explain_result } }
def predict_phishing_words(model_name, model_store_type, email_subject, email_body): model_data = get_model_data(model_name, model_store_type) model = demisto_ml.decode_model(model_data) text = "%s %s" % (email_subject, email_body) res = demisto.executeCommand('WordTokenizerNLP', {'value': text, 'hashWordWithSeed': demisto.args().get('hashSeed')}) if is_error(res[0]): return_error(res[0]['Contents']) tokenized_text_result = res[0]['Contents'] input_text = tokenized_text_result['hashedTokenizedText'] if tokenized_text_result.get('hashedTokenizedText') else \ tokenized_text_result['tokenizedText'] filtered_text, filtered_text_number_of_words = demisto_ml.filter_model_words(input_text, model) if filtered_text_number_of_words == 0: return_error("The model does not contains any of the input text words") explain_result = demisto_ml.explain_model_words(model, input_text, float(demisto.args().get('labelProbabilityThreshold', 0)), float(demisto.args().get('wordThreshold', 0)), int(demisto.args()['topWordsLimit'])) if tokenized_text_result.get('hashedTokenizedText'): hash_word_to_plain = dict( zip(tokenized_text_result['hashedTokenizedText'].split(" "), tokenized_text_result['tokenizedText'].split(" "))) explain_result['PositiveWords'] = map(lambda x: hash_word_to_plain[x], explain_result['PositiveWords']) explain_result['NegativeWords'] = map(lambda x: hash_word_to_plain[x], explain_result['NegativeWords']) explain_result['OriginalText'] = tokenized_text_result['originalText'] explain_result['TextTokensHighlighted'] = tokenized_text_result['tokenizedText'] res = demisto.executeCommand('HighlightWords', {'text': tokenized_text_result['tokenizedText'], 'terms': ",".join(explain_result['PositiveWords'])}) res = res[0] if not is_error(res): highlighted_text_markdown = res['Contents'] explain_result['TextTokensHighlighted'] = highlighted_text_markdown return { 'Type': entryTypes['note'], 'Contents': explain_result, 'ContentsFormat': formats['json'], 'HumanReadable': tableToMarkdown('DBot Predict Phihsing Words', explain_result, headers=['TextTokensHighlighted', 'Label', 'Probability', 'PositiveWords', 'NegativeWords'], removeNull=True), 'HumanReadableFormat': formats['markdown'], 'EntryContext': { 'DBotPredictPhishingWords': explain_result } }
def predict_phishing_words(model_name, model_store_type, email_subject, email_body, min_text_length, label_threshold, word_threshold, top_word_limit, is_return_error, set_incidents_fields=False): model_data = get_model_data(model_name, model_store_type, is_return_error) model = demisto_ml.decode_model(model_data) text = "%s %s" % (email_subject, email_body) res = demisto.executeCommand( 'WordTokenizerNLP', { 'value': text, 'hashWordWithSeed': demisto.args().get('hashSeed') }) if is_error(res[0]): handle_error(res[0]['Contents'], is_return_error) tokenized_text_result = res[0]['Contents'] input_text = tokenized_text_result['hashedTokenizedText'] if tokenized_text_result.get('hashedTokenizedText') else \ tokenized_text_result['tokenizedText'] filtered_text, filtered_text_number_of_words = demisto_ml.filter_model_words( input_text, model) if filtered_text_number_of_words == 0: handle_error("The model does not contain any of the input text words", is_return_error) if filtered_text_number_of_words < min_text_length: handle_error( "The model contains fewer than %d words" % min_text_length, is_return_error) explain_result = demisto_ml.explain_model_words(model, input_text, 0, word_threshold, top_word_limit) predicted_prob = explain_result["Probability"] if predicted_prob < label_threshold: handle_error( "Label probability is {:.2f} and it's below the input confidence threshold" .format(predicted_prob), is_return_error) if tokenized_text_result.get('hashedTokenizedText'): words_to_token_maps = tokenized_text_result['wordsToHashedTokens'] else: words_to_token_maps = tokenized_text_result['originalWordsToTokens'] positive_tokens = set([ ''.join(c for c in word if c.isalnum()) for word in explain_result['PositiveWords'] ]) negative_tokens = set([ ''.join(c for c in word if c.isalnum()) for word in explain_result['NegativeWords'] ]) positive_words = find_words_contain_tokens(positive_tokens, words_to_token_maps) negative_words = find_words_contain_tokens(negative_tokens, words_to_token_maps) positive_words = [s.strip(punctuation) for s in positive_words] negative_words = [s.strip(punctuation) for s in negative_words] if len(positive_words) > 0: res = demisto.executeCommand( 'HighlightWords', { 'text': tokenized_text_result['originalText'], 'terms': ",".join(positive_words) }) res = res[0] if not is_error(res): highlighted_text_markdown = res['Contents'] else: highlighted_text_markdown = tokenized_text_result[ 'originalText'].strip() else: highlighted_text_markdown = tokenized_text_result[ 'originalText'].strip() explain_result['PositiveWords'] = positive_words explain_result['NegativeWords'] = negative_words explain_result['OriginalText'] = tokenized_text_result[ 'originalText'].strip() explain_result['TextTokensHighlighted'] = highlighted_text_markdown predicted_label = explain_result["Label"] explain_result_hr = dict() explain_result_hr['TextTokensHighlighted'] = highlighted_text_markdown explain_result_hr['Label'] = predicted_label explain_result_hr['Probability'] = "%.2f" % predicted_prob explain_result_hr['Confidence'] = "%.2f" % predicted_prob explain_result_hr['PositiveWords'] = ", ".join(positive_words) explain_result_hr['NegativeWords'] = ", ".join(negative_words) incident_context = demisto.incidents()[0] if not incident_context['isPlayground'] and set_incidents_fields: demisto.executeCommand( "setIncident", { 'dbotprediction': predicted_label, 'dbotpredictionprobability': predicted_prob, 'dbottextsuggestionhighlighted': highlighted_text_markdown }) return { 'Type': entryTypes['note'], 'Contents': explain_result, 'ContentsFormat': formats['json'], 'HumanReadable': tableToMarkdown('DBot Predict Phishing Words', explain_result_hr, headers=[ 'TextTokensHighlighted', 'Label', 'Confidence', 'PositiveWords', 'NegativeWords' ], removeNull=True), 'HumanReadableFormat': formats['markdown'], 'EntryContext': { 'DBotPredictPhishingWords': explain_result } }
def predict_phishing_words(model_name, model_store_type, email_subject, email_body): model_data = get_model_data(model_name, model_store_type) model = demisto_ml.decode_model(model_data) text = "%s %s" % (email_subject, email_body) res = demisto.executeCommand( 'WordTokenizerNLP', { 'value': text, 'hashWordWithSeed': demisto.args().get('hashSeed') }) if is_error(res[0]): return_error(res[0]['Contents']) tokenized_text_result = res[0]['Contents'] input_text = tokenized_text_result['hashedTokenizedText'] if tokenized_text_result.get('hashedTokenizedText') else \ tokenized_text_result['tokenizedText'] filtered_text, filtered_text_number_of_words = demisto_ml.filter_model_words( input_text, model) if filtered_text_number_of_words == 0: return_error("The model does not contains any of the input text words") explain_result = demisto_ml.explain_model_words( model, input_text, float(demisto.args().get('labelProbabilityThreshold', 0)), float(demisto.args().get('wordThreshold', 0)), int(demisto.args()['topWordsLimit'])) if tokenized_text_result.get('hashedTokenizedText'): hash_word_to_plain = dict( zip(tokenized_text_result['hashedTokenizedText'].split(" "), tokenized_text_result['tokenizedText'].split(" "))) explain_result['PositiveWords'] = map(lambda x: hash_word_to_plain[x], explain_result['PositiveWords']) explain_result['NegativeWords'] = map(lambda x: hash_word_to_plain[x], explain_result['NegativeWords']) explain_result['OriginalText'] = tokenized_text_result['originalText'] explain_result['TextTokensHighlighted'] = tokenized_text_result[ 'tokenizedText'] res = demisto.executeCommand( 'HighlightWords', { 'text': tokenized_text_result['tokenizedText'], 'terms': ",".join(explain_result['PositiveWords']) }) res = res[0] if not is_error(res): highlighted_text_markdown = res['Contents'] explain_result['TextTokensHighlighted'] = highlighted_text_markdown return { 'Type': entryTypes['note'], 'Contents': explain_result, 'ContentsFormat': formats['json'], 'HumanReadable': tableToMarkdown('DBot Predict Phihsing Words', explain_result, headers=[ 'TextTokensHighlighted', 'Label', 'Probability', 'PositiveWords', 'NegativeWords' ], removeNull=True), 'HumanReadableFormat': formats['markdown'], 'EntryContext': { 'DBotPredictPhishingWords': explain_result } }