def get_user_chart_data(wiki, db, user): charts_data = cache.get('wiki-fi:userdata_{0}_{1}'.format(user['username'].replace(' ', '_'), wiki)) if charts_data is None: charts_data = analyze.analyze_user(wiki, db, user) cache.set('wiki-fi:userdata_{0}_{1}'.format(user['username'].replace(' ', '_'), wiki), charts_data, timeout=0) return charts_data
from analyze import analyze_user from dataset_scrape import get_posts from reddit import send_message users = [e[0] for e in get_posts(100, ["depression", "suicidewatch"])] friendly_message = """ Hey there! Just wanted to reach out to you and let you know that you're awesome. Just wanted to let you knkow that if you ever need somebody talk to feel free to talk to, send me a message and I'd be happy to chat! If you're ever feeling suicidal please take a look at /r/SWResources. You're an amazing person and I hope you have a great day. """ for user in users: if analyze_user(None, user) > 0.5: send_message(user, "Just checking in!", friendly_message)
def update(wiki): db, w_api = load(wiki) last_edit = get_last_edit_datetime(db) print('Last edit time was: ' + str(last_edit)) print('Fetching edits from wiki...') recent_edits = w_api.get_recent_changes(last_edit) print('Successfully fetched edits from wiki') expensive_users_updated = [] datenow = datetime.datetime.now() last_seen_rcid_store = db['metadata'].find_one({'key': 'last_seen_rcid'}, fields=['value']) if last_seen_rcid_store is None: last_seen_rcid = 0 else: last_seen_rcid = last_seen_rcid_store['value'] for edit in recent_edits: # check if edit has already been inserted into db if edit['rcid'] <= last_seen_rcid: continue timestamp = get_date_from_string(edit['timestamp']) if 'user' in edit: username = edit['user'].encode('utf-8') if 'title' in edit: title = edit['title'].encode('utf-8') if 'ns' in edit: ns = edit['ns'] if 'rcid' in edit: rcid = edit['rcid'] if 'redirect' in edit: redirect = True else: redirect = False if edit['type'] == 'new': print('RCID: {0} - NEWPAGE: {1}'.format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = {'user_id': user_id, 'ns': ns, 'revid': edit['revid'], 'page_id': page_id, 'timestamp': timestamp, 'new_page': True } db['edits'].insert(output) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) elif edit['type'] == 'edit': print('RCID: {0} - EDIT: {1}'.format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = {'user_id': user_id, 'ns': ns, 'revid': edit['revid'], 'page_id': page_id, 'timestamp': timestamp, 'new_page': False } db['edits'].insert(output) db['pages'].update({'_id': page_id}, {'$set': {'redirect': redirect}}) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) elif edit['type'] == 'log': if edit['logtype'] == 'move': if edit['logaction'] == 'move_redir': print('RCID: {0} - PAGEMOVE: {1} -> {2}'.format(rcid, title, edit['0'].encode('utf-8'))) else: print('RCID: {0} - PAGEMOVE: {1} -> {2}'.format(rcid, title, edit['move']['new_title'].encode('utf-8'))) page_id = get_page_id(db, wiki, title, ns, redirect) old_page_title = edit['title'].encode('utf-8') if edit['logaction'] == 'move_redir': new_page_title = edit['0'].encode('utf-8') # ugly hack because api doesn't return new namespace new_page_ns = get_namespace_from_title(db, new_page_title) else: new_page_title = edit['move']['new_title'].encode('utf-8') new_page_ns = edit['move']['new_ns'] # delete existing target page and any edits that referenced it target_page = db['pages'].find_one({'title': new_page_title}) if target_page: db['pages'].remove({'_id': target_page['_id']}) db['edits'].remove({'page_id': target_page['_id']}) for lang in langArray: if new_page_title.endswith('/' + lang): language = lang break else: language = 'en' # rename oldpage to newpage db['pages'].update({'_id': page_id}, {'$set': {'title': new_page_title, 'ns': new_page_ns, 'lang': language, 'redirect': False}}) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) if 'suppressedredirect' in edit or ('move' in edit and 'suppressedredirect' not in edit['move']): # left behind a redirect print('RCID: {0} - REDIRECTCREATION: {1}'.format(rcid, title)) page_id = get_page_id(db, wiki, title, ns, True) user_id = get_user_id(db, wiki, w_api, username) output = {'user_id': user_id, 'ns': ns, 'revid': edit['revid'], 'page_id': page_id, 'timestamp': timestamp, 'new_page': True } db['edits'].insert(output) db['pages'].update({'_id': page_id}, {'$set': {'redirect': True}}) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) elif edit['logtype'] == 'upload': print('RCID: {0} - FILEUPLOAD: {1}'.format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = {'user_id': user_id, 'page_id': page_id, 'timestamp': timestamp } db['files'].insert(output) if edit['logaction'] == 'upload': output = {'user_id': user_id, 'ns': ns, 'revid': edit['revid'], 'page_id': page_id, 'timestamp': timestamp, 'new_page': True } db['edits'].insert(output) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) elif edit['logtype'] == 'delete': if edit['logaction'] == 'delete': print('RCID: {0} - DELETION: {1}'.format(rcid, title)) page_id = get_page_id(db, wiki, edit['title'], ns, redirect) db['edits'].remove({'page_id': page_id}) db['pages'].remove({'_id': page_id}) db['files'].remove({'page_id': page_id}) cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki)) elif edit['logaction'] == 'restore': print('RCID: {0} - RESTORE: {1}'.format(rcid, title)) page_id = get_page_id(db, wiki, edit['title'], ns, redirect) revisions = w_api.get_page_revisions(edit['title']) first = True for revision in revisions: username = revision['user'].encode('utf-8') timestamp = get_date_from_string(revision['timestamp']) revid = revision['revid'] user_id = get_user_id(db, wiki, w_api, username) output = {'user_id': user_id, 'ns': ns, 'revid': revid, 'page_id': page_id, 'timestamp': timestamp, 'new_page': first } db['edits'].insert(output) first = False if ns == 6: uploads = w_api.get_file_uploads(edit['title']) if uploads is None: # it's a redirect page continue for upload in uploads: username = upload['user'].encode('utf-8') timestamp = get_date_from_string(upload['timestamp']) user_id = get_user_id(db, wiki, w_api, username) output = {'user_id': user_id, 'page_id': page_id, 'timestamp': timestamp } db['files'].insert(output) elif edit['logtype'] == 'newusers': print('RCID: {0} - NEWUSER: {1}'.format(rcid, username)) elif edit['logtype'] == 'block': print('RCID: {0} - BLOCK: {1}'.format(rcid, title)) else: print('MISSED') print edit else: print('MISSED') print edit last_seen_rcid = edit['rcid'] if username not in config['wikis'][wiki]['expensive_users']: cache.delete('wiki-fi:userdata_{0}_{1}'.format(username.replace(' ', '_'), wiki)) elif username not in expensive_users_updated: expensive_users_updated.append(username) # reanalyze expensive users print('Recaching results for expensive users...') for username in expensive_users_updated: charts_data = analyze.analyze_user(wiki, db, db['users'].find_one({'username': username})) cache.set('wiki-fi:userdata_{0}_{1}'.format(username.replace(' ', '_'), wiki), charts_data, timeout=0) # update last_updated time db['metadata'].update({'key': 'last_seen_rcid'}, {'$set': {'value': last_seen_rcid}}, upsert=True) db['metadata'].update({'key': 'user_and_pages_last_updated'}, {'$set': {'value': datenow}}, upsert=True) cache.set('wiki-fi:user_and_pages_last_updated_' + wiki, datenow, timeout=0) cache.delete('wiki-fi:wiki-fi_stats')
def update(wiki): db, w_api = load(wiki) last_edit = get_last_edit_datetime(db) print ("Last edit time was: " + str(last_edit)) print ("Fetching edits from wiki...") recent_edits = w_api.get_recent_changes(last_edit) print ("Successfully fetched edits from wiki") expensive_users_updated = [] datenow = datetime.datetime.now() last_seen_rcid_store = db["metadata"].find_one({"key": "last_seen_rcid"}, fields=["value"]) if last_seen_rcid_store is None: last_seen_rcid = 0 else: last_seen_rcid = last_seen_rcid_store["value"] for edit in recent_edits: # check if edit has already been inserted into db if edit["rcid"] <= last_seen_rcid: continue timestamp = get_date_from_string(edit["timestamp"]) if "user" in edit: username = edit["user"].encode("utf-8") if "title" in edit: title = edit["title"].encode("utf-8") if "ns" in edit: ns = edit["ns"] if "rcid" in edit: rcid = edit["rcid"] if "redirect" in edit: redirect = True else: redirect = False if edit["type"] == "new": print ("RCID: {0} - NEWPAGE: {1}".format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = { "user_id": user_id, "ns": ns, "revid": edit["revid"], "page_id": page_id, "timestamp": timestamp, "new_page": True, } db["edits"].insert(output) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) elif edit["type"] == "edit": print ("RCID: {0} - EDIT: {1}".format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = { "user_id": user_id, "ns": ns, "revid": edit["revid"], "page_id": page_id, "timestamp": timestamp, "new_page": False, } db["edits"].insert(output) db["pages"].update({"_id": page_id}, {"$set": {"redirect": redirect}}) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) elif edit["type"] == "log": if edit["logtype"] == "move": print ( "RCID: {0} - PAGEMOVE: {1} -> {2}".format( rcid, title, edit["logparams"]["target_title"].encode("utf-8") ) ) page_id = get_page_id(db, wiki, title, ns, redirect) old_page_title = edit["title"].encode("utf-8") new_page_title = edit["logparams"]["target_title"].encode("utf-8") new_page_ns = edit["logparams"]["target_ns"] # delete existing target page and any edits that referenced it target_page = db["pages"].find_one({"title": new_page_title}) if target_page: db["pages"].remove({"_id": target_page["_id"]}) db["edits"].remove({"page_id": target_page["_id"]}) for lang in langArray: if new_page_title.endswith("/" + lang): language = lang break else: language = "en" # rename oldpage to newpage db["pages"].update( {"_id": page_id}, {"$set": {"title": new_page_title, "ns": new_page_ns, "lang": language, "redirect": False}}, ) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) if "suppressedredirect" in edit or ("move" in edit and "suppressedredirect" not in edit["move"]): # left behind a redirect print ("RCID: {0} - REDIRECTCREATION: {1}".format(rcid, title)) page_id = get_page_id(db, wiki, title, ns, True) user_id = get_user_id(db, wiki, w_api, username) output = { "user_id": user_id, "ns": ns, "revid": edit["revid"], "page_id": page_id, "timestamp": timestamp, "new_page": True, } db["edits"].insert(output) db["pages"].update({"_id": page_id}, {"$set": {"redirect": True}}) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) elif edit["logtype"] == "upload": print ("RCID: {0} - FILEUPLOAD: {1}".format(rcid, title)) user_id = get_user_id(db, wiki, w_api, username) page_id = get_page_id(db, wiki, title, ns, redirect) output = {"user_id": user_id, "page_id": page_id, "timestamp": timestamp} db["files"].insert(output) if edit["logaction"] == "upload": output = { "user_id": user_id, "ns": ns, "revid": edit["revid"], "page_id": page_id, "timestamp": timestamp, "new_page": True, } db["edits"].insert(output) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) elif edit["logtype"] == "delete": if edit["logaction"] == "delete": print ("RCID: {0} - DELETION: {1}".format(rcid, title)) page_id = get_page_id(db, wiki, edit["title"], ns, redirect) db["edits"].remove({"page_id": page_id}) db["pages"].remove({"_id": page_id}) db["files"].remove({"page_id": page_id}) cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki)) elif edit["logaction"] == "restore": print ("RCID: {0} - RESTORE: {1}".format(rcid, title)) page_id = get_page_id(db, wiki, edit["title"], ns, redirect) revisions = w_api.get_page_revisions(edit["title"]) first = True for revision in revisions: username = revision["user"].encode("utf-8") timestamp = get_date_from_string(revision["timestamp"]) revid = revision["revid"] user_id = get_user_id(db, wiki, w_api, username) output = { "user_id": user_id, "ns": ns, "revid": revid, "page_id": page_id, "timestamp": timestamp, "new_page": first, } db["edits"].insert(output) first = False if ns == 6: uploads = w_api.get_file_uploads(edit["title"]) if uploads is None: # it's a redirect page continue for upload in uploads: username = upload["user"].encode("utf-8") timestamp = get_date_from_string(upload["timestamp"]) user_id = get_user_id(db, wiki, w_api, username) output = {"user_id": user_id, "page_id": page_id, "timestamp": timestamp} db["files"].insert(output) elif edit["logtype"] == "newusers": print ("RCID: {0} - NEWUSER: {1}".format(rcid, username)) elif edit["logtype"] == "block": print ("RCID: {0} - BLOCK: {1}".format(rcid, title)) else: print ("MISSED") print edit else: print ("MISSED") print edit last_seen_rcid = edit["rcid"] if username not in config["wikis"][wiki]["expensive_users"]: cache.delete("wiki-fi:userdata_{0}_{1}".format(username.replace(" ", "_"), wiki)) elif username not in expensive_users_updated: expensive_users_updated.append(username) # reanalyze expensive users print ("Recaching results for expensive users...") for username in expensive_users_updated: charts_data = analyze.analyze_user(wiki, db, db["users"].find_one({"username": username})) cache.set("wiki-fi:userdata_{0}_{1}".format(username.replace(" ", "_"), wiki), charts_data, timeout=0) # update last_updated time db["metadata"].update({"key": "last_seen_rcid"}, {"$set": {"value": last_seen_rcid}}, upsert=True) db["metadata"].update({"key": "user_and_pages_last_updated"}, {"$set": {"value": datenow}}, upsert=True) cache.set("wiki-fi:user_and_pages_last_updated_" + wiki, datenow, timeout=0) cache.delete("wiki-fi:wiki-fi_stats")
def get(self): twitter_handler = self.get_argument('twitter_handler') reddit_handler = self.get_argument('reddit_handler') results = analyze.analyze_user(twitter_handler, reddit_handler) self.finish({"results": results})