def log_change(): """ gets latest revisions and cleans them """ apikey_data, toxicity, dlp = perspective.get_client() start = datetime.datetime.utcnow() - datetime.timedelta(minutes=2) while True: end = datetime.datetime.utcnow() page = ( args.mediawiki + "api.php?action=query&list=recentchanges&rclimit=500&rcprop=title%7Cids%7Csizes%7Cflags%7Cuser&rcdir=newer&rcstart=" + start.isoformat() + "&rcend=" + end.isoformat() + "&format=json") get_page = requests.get(page) response = json.loads(get_page.content) start = end for change in response['query']['recentchanges']: print('new change:') revid = str(change['revid']) old_revid = str(change['old_revid']) compare = (args.mediawiki + "api.php?action=compare&fromrev=" + old_revid + "&torev=" + revid + "&format=json") get_compare = requests.get(compare) response = json.loads(get_compare.content.decode('utf-8')) if 'compare' not in response: continue revision = response['compare']['*'] text = clean.content_clean(revision) dlp_response = perspective.dlp_request(dlp, apikey_data, text) print(text) try: perspective_response = perspective.perspective_request( toxicity, text) # Perspective can't handle language errors at this time except google_api_errors.HttpError as err: print('Error:', err) return has_pii_bool, pii_type = perspective.contains_pii(dlp_response) if has_pii_bool: header = '==Possible Doxxing Detected: Waiting for review==' result = (json.dumps({ u"comment_text": text, "contains_pii": True, "pii_type": pii_type }) + "\n") wiki_write(result, header) if perspective.contains_toxicity(perspective_response): header = '==Possibly Toxic Detected: Waiting for review==' result = (json.dumps({ u"comment_text": text, "contains_toxicity": True, "summaryScore": perspective_response['attributeScores']['TOXICITY'] ['summaryScore']['value'] }) + "\n") wiki_write(result, header) time.sleep(120)
def log_event(apikey_data, toxicity, dlp, change): """Logs event by printing. Args: change: a json object with the wikimedia change record. """ # print( # u'user:{user} namespace:{namespace} bot:{bot} comment:{comment} title:{title}' # .format(**change)) # print('\n########## change:') from_id = (str(change['revision']['old'])) to_id = (str(change['revision']['new'])) page = ('https://en.wikipedia.org/w/api.php?action=compare&fromrev=' + from_id + '&torev=' + to_id + '&format=json') get_page = requests.get(page) response = json.loads(get_page.content.decode('utf-8')) revision = response['compare']['*'] text = clean.content_clean(revision) # for line in text: print(text) if not text: return dlp_response = perspective.dlp_request(dlp, apikey_data, text) try: perspective_response = perspective.perspective_request(toxicity, text) # Perspective can't handle language errors at this time except google_api_errors.HttpError as err: print('Error:', err) return has_pii_bool, pii_type = perspective.contains_pii(dlp_response) if has_pii_bool: header = '==Possible Doxxing Detected: Waiting for review==' result = ( u'{' 'user:{user}, namespace:{namespace}, bot:{bot}, comment:{comment}' + 'title:{title},'.format(**change) + ', ' + 'comment_text:' + str(text) + ', ' + 'contains_pii:' + 'True' + ', ' + 'pii_type:' + str(pii_type) + ', ' '}' '\n') wiki_write(result, header) if perspective.contains_toxicity(perspective_response): header = '==Possibly Toxic Detected: Waiting for review==' result = ( u'{' 'user:{user}, namespace:{namespace}, bot:{bot}, comment:{comment}' + 'title:{title}'.format(**change) + ', ' + 'comment_text:' + str(text) + ', ' + 'contains_toxicity:' + 'True' + ', ' + 'toxic_score:' + str(perspective_response['attributeScores'] + ['TOXICITY']['summaryScore']['value']) + ', ' '}' '\n') wiki_write(result, header)
def contains_threat(perspective_response): perspective_response = \ {'attributeScores': {'INSULT': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.55873775, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.55873775, 'type': 'PROBABILITY'}}, 'TOXICITY': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.9759337, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9759337, 'type': 'PROBABILITY'}}, 'THREAT': {'spanScores': [{'begin': 0, 'end': 21, 'score': {'value': 0.9980843, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9980843, 'type': 'PROBABILITY'}}}, 'languages': ['en'], 'detectedLanguages': ['en']} is_threat = perspective.contains_toxicity(perspective_response) self.assertTrue(is_threat)
def test_contains_toxicity_true(self): perspective_response = \ { "attributeScores": { "INSULT": { "spanScores": [ { "begin": 0, "end": 14, "score": { "value": 0.8521307, "type": "PROBABILITY" } } ], "summaryScore": { "value": 0.8521307, "type": "PROBABILITY" } }, "TOXICITY": { "spanScores": [ { "begin": 0, "end": 14, "score": { "value": 0.96624386, "type": "PROBABILITY" } } ], "summaryScore": { "value": 0.96624386, "type": "PROBABILITY" } }, "THREAT": { "spanScores": [ { "begin": 0, "end": 14, "score": { "value": 0.39998722, "type": "PROBABILITY" } } ], "summaryScore": { "value": 0.39998722, "type": "PROBABILITY" } } }, "languages": [ "en" ], "detectedLanguages": [ "en" ] } is_toxic = perspective.contains_toxicity(perspective_response) self.assertTrue(is_toxic)
def test_contains_toxicity_false(self): perspective_response = \ {'attributeScores': {'TOXICITY': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.9312127, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.9312127, 'type': 'PROBABILITY'}}, 'THREAT': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.15875438, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.15875438, 'type': 'PROBABILITY'}}, 'INSULT': {'spanScores': [{'begin': 0, 'end': 25, 'score': {'value': 0.93682694, 'type': 'PROBABILITY'}}], 'summaryScore': {'value': 0.93682694, 'type': 'PROBABILITY'}}}, 'languages': ['en'], 'detectedLanguages': ['en']} is_insult = perspective.contains_toxicity(perspective_response) self.assertTrue(is_insult)