Ejemplo n.º 1
0
def get_user_chart_data(wiki, db, user):
	charts_data = cache.get('wiki-fi:userdata_{0}_{1}'.format(user['username'].replace(' ', '_'), wiki))
	if charts_data is None:
		charts_data = analyze.analyze_user(wiki, db, user)
		cache.set('wiki-fi:userdata_{0}_{1}'.format(user['username'].replace(' ', '_'), wiki), charts_data, timeout=0)
	return charts_data
Ejemplo n.º 2
0
from analyze import analyze_user
from dataset_scrape import get_posts
from reddit import send_message

users = [e[0] for e in get_posts(100, ["depression", "suicidewatch"])]

friendly_message = """
Hey there! Just wanted to reach out to you and let you know that you're awesome.

Just wanted to let you knkow that if you ever need somebody talk to feel free to talk to, send me a message and I'd be happy to chat!

If you're ever feeling suicidal please take a look at /r/SWResources.

You're an amazing person and I hope you have a great day.
"""

for user in users:
    if analyze_user(None, user) > 0.5:
        send_message(user, "Just checking in!", friendly_message)
Ejemplo n.º 3
0
def update(wiki):
	db, w_api = load(wiki)

	last_edit = get_last_edit_datetime(db)
	print('Last edit time was: ' + str(last_edit))

	print('Fetching edits from wiki...')
	recent_edits = w_api.get_recent_changes(last_edit)
	print('Successfully fetched edits from wiki')

	expensive_users_updated = []
	datenow = datetime.datetime.now()
	last_seen_rcid_store = db['metadata'].find_one({'key': 'last_seen_rcid'}, fields=['value'])
	if last_seen_rcid_store is None:
		last_seen_rcid = 0
	else:
		last_seen_rcid = last_seen_rcid_store['value']

	for edit in recent_edits:
		# check if edit has already been inserted into db
		if edit['rcid'] <= last_seen_rcid:
			continue

		timestamp = get_date_from_string(edit['timestamp'])
		if 'user' in edit:
			username = edit['user'].encode('utf-8')
		if 'title' in edit:
			title = edit['title'].encode('utf-8')
		if 'ns' in edit:
			ns = edit['ns']
		if 'rcid' in edit:
			rcid = edit['rcid']
		if 'redirect' in edit:
			redirect = True
		else:
			redirect = False

		if edit['type'] == 'new':
			print('RCID: {0} - NEWPAGE: {1}'.format(rcid, title))

			user_id = get_user_id(db, wiki, w_api, username)
			page_id = get_page_id(db, wiki, title, ns, redirect)
			output = {'user_id': user_id,
                      'ns': ns,
                      'revid': edit['revid'],
                      'page_id': page_id,
                      'timestamp': timestamp,
                      'new_page': True
                      }
			db['edits'].insert(output)
			cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

		elif edit['type'] == 'edit':
			print('RCID: {0} - EDIT: {1}'.format(rcid, title))

			user_id = get_user_id(db, wiki, w_api, username)
			page_id = get_page_id(db, wiki, title, ns, redirect)
			output = {'user_id': user_id,
                      'ns': ns,
                      'revid': edit['revid'],
                      'page_id': page_id,
                      'timestamp': timestamp,
                      'new_page': False
                      }
			db['edits'].insert(output)
			db['pages'].update({'_id': page_id}, {'$set': {'redirect': redirect}})
			cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

		elif edit['type'] == 'log':
			if edit['logtype'] == 'move':
				if edit['logaction'] == 'move_redir':
					print('RCID: {0} - PAGEMOVE: {1} -> {2}'.format(rcid, title, edit['0'].encode('utf-8')))
				else:
					print('RCID: {0} - PAGEMOVE: {1} -> {2}'.format(rcid, title, edit['move']['new_title'].encode('utf-8')))

				page_id = get_page_id(db, wiki, title, ns, redirect)
				old_page_title = edit['title'].encode('utf-8')
				if edit['logaction'] == 'move_redir':
					new_page_title = edit['0'].encode('utf-8')
					# ugly hack because api doesn't return new namespace
					new_page_ns = get_namespace_from_title(db, new_page_title)
				else:
					new_page_title = edit['move']['new_title'].encode('utf-8')
					new_page_ns = edit['move']['new_ns']

				# delete existing target page and any edits that referenced it
				target_page = db['pages'].find_one({'title': new_page_title})
				if target_page:
					db['pages'].remove({'_id': target_page['_id']})
					db['edits'].remove({'page_id': target_page['_id']})

				for lang in langArray:
					if new_page_title.endswith('/' + lang):
						language = lang
						break
				else:
					language = 'en'
				# rename oldpage to newpage
				db['pages'].update({'_id': page_id}, {'$set': {'title': new_page_title, 'ns': new_page_ns, 'lang': language, 'redirect': False}})
				cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

				if 'suppressedredirect' in edit or ('move' in edit and 'suppressedredirect' not in edit['move']):
					# left behind a redirect
					print('RCID: {0} - REDIRECTCREATION: {1}'.format(rcid, title))

					page_id = get_page_id(db, wiki, title, ns, True)
					user_id = get_user_id(db, wiki, w_api, username)
					output = {'user_id': user_id,
                              'ns': ns,
                              'revid': edit['revid'],
                              'page_id': page_id,
                              'timestamp': timestamp,
                              'new_page': True
                              }
					db['edits'].insert(output)
					db['pages'].update({'_id': page_id}, {'$set': {'redirect': True}})
					cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

			elif edit['logtype'] == 'upload':
				print('RCID: {0} - FILEUPLOAD: {1}'.format(rcid, title))

				user_id = get_user_id(db, wiki, w_api, username)
				page_id = get_page_id(db, wiki, title, ns, redirect)

				output = {'user_id': user_id,
		                  'page_id': page_id,
		                  'timestamp': timestamp
		                  }
				db['files'].insert(output)

				if edit['logaction'] == 'upload':
					output = {'user_id': user_id,
	                          'ns': ns,
	                          'revid': edit['revid'],
	                          'page_id': page_id,
	                          'timestamp': timestamp,
	                          'new_page': True
	                          }
					db['edits'].insert(output)
					cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

			elif edit['logtype'] == 'delete':
				if edit['logaction'] == 'delete':
					print('RCID: {0} - DELETION: {1}'.format(rcid, title))

					page_id = get_page_id(db, wiki, edit['title'], ns, redirect)
					db['edits'].remove({'page_id': page_id})
					db['pages'].remove({'_id': page_id})
					db['files'].remove({'page_id': page_id})
					cache.delete('wiki-fi:pagedata_{0}_{1}'.format(title.replace(' ', '_'), wiki))

				elif edit['logaction'] == 'restore':
					print('RCID: {0} - RESTORE: {1}'.format(rcid, title))

					page_id = get_page_id(db, wiki, edit['title'], ns, redirect)
					revisions = w_api.get_page_revisions(edit['title'])

					first = True
					for revision in revisions:
						username = revision['user'].encode('utf-8')
						timestamp = get_date_from_string(revision['timestamp'])
						revid = revision['revid']

						user_id = get_user_id(db, wiki, w_api, username)

						output = {'user_id': user_id,
		                          'ns': ns,
		                          'revid': revid,
		                          'page_id': page_id,
		                          'timestamp': timestamp,
		                          'new_page': first
		                          }
						db['edits'].insert(output)
						first = False

					if ns == 6:
						uploads = w_api.get_file_uploads(edit['title'])
						if uploads is None:
							# it's a redirect page
							continue

						for upload in uploads:
							username = upload['user'].encode('utf-8')
							timestamp = get_date_from_string(upload['timestamp'])

							user_id = get_user_id(db, wiki, w_api, username)

							output = {'user_id': user_id,
			                          'page_id': page_id,
			                          'timestamp': timestamp
			                          }
							db['files'].insert(output)

			elif edit['logtype'] == 'newusers':
				print('RCID: {0} - NEWUSER: {1}'.format(rcid, username))

			elif edit['logtype'] == 'block':
				print('RCID: {0} - BLOCK: {1}'.format(rcid, title))

			else:
				print('MISSED')
				print edit
		else:
			print('MISSED')
			print edit

		last_seen_rcid = edit['rcid']

		if username not in config['wikis'][wiki]['expensive_users']:
			cache.delete('wiki-fi:userdata_{0}_{1}'.format(username.replace(' ', '_'), wiki))
		elif username not in expensive_users_updated:
			expensive_users_updated.append(username)

	# reanalyze expensive users
	print('Recaching results for expensive users...')
	for username in expensive_users_updated:
		charts_data = analyze.analyze_user(wiki, db, db['users'].find_one({'username': username}))
		cache.set('wiki-fi:userdata_{0}_{1}'.format(username.replace(' ', '_'), wiki), charts_data, timeout=0)

	# update last_updated time
	db['metadata'].update({'key': 'last_seen_rcid'}, {'$set': {'value': last_seen_rcid}}, upsert=True)
	db['metadata'].update({'key': 'user_and_pages_last_updated'}, {'$set': {'value': datenow}}, upsert=True)
	cache.set('wiki-fi:user_and_pages_last_updated_' + wiki, datenow, timeout=0)

	cache.delete('wiki-fi:wiki-fi_stats')
Ejemplo n.º 4
0
def update(wiki):
    db, w_api = load(wiki)

    last_edit = get_last_edit_datetime(db)
    print ("Last edit time was: " + str(last_edit))

    print ("Fetching edits from wiki...")
    recent_edits = w_api.get_recent_changes(last_edit)
    print ("Successfully fetched edits from wiki")

    expensive_users_updated = []
    datenow = datetime.datetime.now()
    last_seen_rcid_store = db["metadata"].find_one({"key": "last_seen_rcid"}, fields=["value"])
    if last_seen_rcid_store is None:
        last_seen_rcid = 0
    else:
        last_seen_rcid = last_seen_rcid_store["value"]

    for edit in recent_edits:
        # check if edit has already been inserted into db
        if edit["rcid"] <= last_seen_rcid:
            continue

        timestamp = get_date_from_string(edit["timestamp"])
        if "user" in edit:
            username = edit["user"].encode("utf-8")
        if "title" in edit:
            title = edit["title"].encode("utf-8")
        if "ns" in edit:
            ns = edit["ns"]
        if "rcid" in edit:
            rcid = edit["rcid"]
        if "redirect" in edit:
            redirect = True
        else:
            redirect = False

        if edit["type"] == "new":
            print ("RCID: {0} - NEWPAGE: {1}".format(rcid, title))

            user_id = get_user_id(db, wiki, w_api, username)
            page_id = get_page_id(db, wiki, title, ns, redirect)
            output = {
                "user_id": user_id,
                "ns": ns,
                "revid": edit["revid"],
                "page_id": page_id,
                "timestamp": timestamp,
                "new_page": True,
            }
            db["edits"].insert(output)
            cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

        elif edit["type"] == "edit":
            print ("RCID: {0} - EDIT: {1}".format(rcid, title))

            user_id = get_user_id(db, wiki, w_api, username)
            page_id = get_page_id(db, wiki, title, ns, redirect)
            output = {
                "user_id": user_id,
                "ns": ns,
                "revid": edit["revid"],
                "page_id": page_id,
                "timestamp": timestamp,
                "new_page": False,
            }
            db["edits"].insert(output)
            db["pages"].update({"_id": page_id}, {"$set": {"redirect": redirect}})
            cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

        elif edit["type"] == "log":
            if edit["logtype"] == "move":
                print (
                    "RCID: {0} - PAGEMOVE: {1} -> {2}".format(
                        rcid, title, edit["logparams"]["target_title"].encode("utf-8")
                    )
                )

                page_id = get_page_id(db, wiki, title, ns, redirect)
                old_page_title = edit["title"].encode("utf-8")

                new_page_title = edit["logparams"]["target_title"].encode("utf-8")
                new_page_ns = edit["logparams"]["target_ns"]

                # delete existing target page and any edits that referenced it
                target_page = db["pages"].find_one({"title": new_page_title})
                if target_page:
                    db["pages"].remove({"_id": target_page["_id"]})
                    db["edits"].remove({"page_id": target_page["_id"]})

                for lang in langArray:
                    if new_page_title.endswith("/" + lang):
                        language = lang
                        break
                else:
                    language = "en"
                    # rename oldpage to newpage
                db["pages"].update(
                    {"_id": page_id},
                    {"$set": {"title": new_page_title, "ns": new_page_ns, "lang": language, "redirect": False}},
                )
                cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

                if "suppressedredirect" in edit or ("move" in edit and "suppressedredirect" not in edit["move"]):
                    # left behind a redirect
                    print ("RCID: {0} - REDIRECTCREATION: {1}".format(rcid, title))

                    page_id = get_page_id(db, wiki, title, ns, True)
                    user_id = get_user_id(db, wiki, w_api, username)
                    output = {
                        "user_id": user_id,
                        "ns": ns,
                        "revid": edit["revid"],
                        "page_id": page_id,
                        "timestamp": timestamp,
                        "new_page": True,
                    }
                    db["edits"].insert(output)
                    db["pages"].update({"_id": page_id}, {"$set": {"redirect": True}})
                    cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

            elif edit["logtype"] == "upload":
                print ("RCID: {0} - FILEUPLOAD: {1}".format(rcid, title))

                user_id = get_user_id(db, wiki, w_api, username)
                page_id = get_page_id(db, wiki, title, ns, redirect)

                output = {"user_id": user_id, "page_id": page_id, "timestamp": timestamp}
                db["files"].insert(output)

                if edit["logaction"] == "upload":
                    output = {
                        "user_id": user_id,
                        "ns": ns,
                        "revid": edit["revid"],
                        "page_id": page_id,
                        "timestamp": timestamp,
                        "new_page": True,
                    }
                    db["edits"].insert(output)
                    cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

            elif edit["logtype"] == "delete":
                if edit["logaction"] == "delete":
                    print ("RCID: {0} - DELETION: {1}".format(rcid, title))

                    page_id = get_page_id(db, wiki, edit["title"], ns, redirect)
                    db["edits"].remove({"page_id": page_id})
                    db["pages"].remove({"_id": page_id})
                    db["files"].remove({"page_id": page_id})
                    cache.delete("wiki-fi:pagedata_{0}_{1}".format(title.replace(" ", "_"), wiki))

                elif edit["logaction"] == "restore":
                    print ("RCID: {0} - RESTORE: {1}".format(rcid, title))

                    page_id = get_page_id(db, wiki, edit["title"], ns, redirect)
                    revisions = w_api.get_page_revisions(edit["title"])

                    first = True
                    for revision in revisions:
                        username = revision["user"].encode("utf-8")
                        timestamp = get_date_from_string(revision["timestamp"])
                        revid = revision["revid"]

                        user_id = get_user_id(db, wiki, w_api, username)

                        output = {
                            "user_id": user_id,
                            "ns": ns,
                            "revid": revid,
                            "page_id": page_id,
                            "timestamp": timestamp,
                            "new_page": first,
                        }
                        db["edits"].insert(output)
                        first = False

                    if ns == 6:
                        uploads = w_api.get_file_uploads(edit["title"])
                        if uploads is None:
                            # it's a redirect page
                            continue

                        for upload in uploads:
                            username = upload["user"].encode("utf-8")
                            timestamp = get_date_from_string(upload["timestamp"])

                            user_id = get_user_id(db, wiki, w_api, username)

                            output = {"user_id": user_id, "page_id": page_id, "timestamp": timestamp}
                            db["files"].insert(output)

            elif edit["logtype"] == "newusers":
                print ("RCID: {0} - NEWUSER: {1}".format(rcid, username))

            elif edit["logtype"] == "block":
                print ("RCID: {0} - BLOCK: {1}".format(rcid, title))

            else:
                print ("MISSED")
                print edit
        else:
            print ("MISSED")
            print edit

        last_seen_rcid = edit["rcid"]

        if username not in config["wikis"][wiki]["expensive_users"]:
            cache.delete("wiki-fi:userdata_{0}_{1}".format(username.replace(" ", "_"), wiki))
        elif username not in expensive_users_updated:
            expensive_users_updated.append(username)

            # reanalyze expensive users
    print ("Recaching results for expensive users...")
    for username in expensive_users_updated:
        charts_data = analyze.analyze_user(wiki, db, db["users"].find_one({"username": username}))
        cache.set("wiki-fi:userdata_{0}_{1}".format(username.replace(" ", "_"), wiki), charts_data, timeout=0)

        # update last_updated time
    db["metadata"].update({"key": "last_seen_rcid"}, {"$set": {"value": last_seen_rcid}}, upsert=True)
    db["metadata"].update({"key": "user_and_pages_last_updated"}, {"$set": {"value": datenow}}, upsert=True)
    cache.set("wiki-fi:user_and_pages_last_updated_" + wiki, datenow, timeout=0)

    cache.delete("wiki-fi:wiki-fi_stats")
Ejemplo n.º 5
0
 def get(self):
     twitter_handler = self.get_argument('twitter_handler')
     reddit_handler = self.get_argument('reddit_handler')
     results = analyze.analyze_user(twitter_handler, reddit_handler)
     self.finish({"results": results})