Ejemplo n.º 1
0
def fetch_uids_from_weibo_cn(scheduler):
    # Params.
    global my_uid
    follower_url = 'http://weibo.cn/{}/fans?rightmod=1&wvr=6'.format(my_uid)
    page_num = 101
    find_next_retry = 3

    uids, persist_thresh = [], 100
    url = follower_url
    for i in xrange(page_num):
        with get_pager() as pager:
            pager.get(url)
            try:
                # Remember the following function also update 'st' token which
                # would be used to to remove followers.
                uids_in_page = get_follower_uids_in_a_page(pager)
                if uids_in_page:
                    uids.extend(uids_in_page)

                if len(uids) >= persist_thresh:
                    Follower.save_uids(uids)
                    logging.info('Persisted %d uids' % len(uids))
                    uids = []
            except RemoveZombieException as e:
                logging.warn(unicode(e))

            for _ in range(find_next_retry):
                try:
                    links = pager.find_elements_by_link_text(u'下页')
                    if not links:
                        raise Exception('Could\'nt find next page link')
                    next_page = links[0]
                    # Set next page URL.
                    url = next_page.get_attribute('href')
                except Exception as e:
                    logging.warn(unicode(e))
                    logging.info('Retry after a second')
                    time.sleep(1)
                else:
                    logging.info('Go to next page: %d' % (i + 1))
                    break
            else:
                msg = 'Failed to find next page for %d times, exit' \
                      % find_next_retry
                logging.warn(msg)
                break

    # Persist remaining list of UIDs.
    if uids:
        Follower.save_uids(uids)
        logging.info('Persisted %d uids' % len(uids))

    # Schedule next task.
    logging.info('Schedule next fetching')
    scheduler.enter(SCHEDULE_INTERVAL, 1, fetch_uids_from_weibo_cn,
                    (scheduler, ))
Ejemplo n.º 2
0
def fetch_uids_from_weibo_cn(scheduler):
    # Params.
    global my_uid
    follower_url = 'http://weibo.cn/{}/fans?rightmod=1&wvr=6'.format(my_uid)
    page_num = 101
    find_next_retry = 3

    uids, persist_thresh = [], 100
    url = follower_url
    for i in xrange(page_num):
        with get_pager() as pager:
            pager.get(url)
            try:
                # Remember the following function also update 'st' token which
                # would be used to to remove followers.
                uids_in_page = get_follower_uids_in_a_page(pager)
                if uids_in_page:
                    uids.extend(uids_in_page)

                if len(uids) >= persist_thresh:
                    Follower.save_uids(uids)
                    logging.info('Persisted %d uids' % len(uids))
                    uids = []
            except RemoveZombieException as e:
                logging.warn(unicode(e))

            for _ in range(find_next_retry):
                try:
                    links = pager.find_elements_by_link_text(u'下页')
                    if not links:
                        raise Exception('Could\'nt find next page link')
                    next_page = links[0]
                    # Set next page URL.
                    url = next_page.get_attribute('href')
                except Exception as e:
                    logging.warn(unicode(e))
                    logging.info('Retry after a second')
                    time.sleep(1)
                else:
                    logging.info('Go to next page: %d' % (i + 1))
                    break
            else:
                msg = 'Failed to find next page for %d times, exit' \
                      % find_next_retry
                logging.warn(msg)
                break

    # Persist remaining list of UIDs.
    if uids:
        Follower.save_uids(uids)
        logging.info('Persisted %d uids' % len(uids))

    # Schedule next task.
    logging.info('Schedule next fetching')
    scheduler.enter(
        SCHEDULE_INTERVAL, 1, fetch_uids_from_weibo_cn, (scheduler,))
Ejemplo n.º 3
0
def fetch_follower_info(scheduler):
    uids = Follower.get_unfilled_uids()
    info = namedtuple('Info', ['uid', 'weibo_count', 'follower_count'])
    if uids:
        follower_info_list, persist_thresh = [], 100
        for i in range(0, len(uids), CONCURRENT_CONN):
            sub_uid_list = uids[i:i + CONCURRENT_CONN]
            concurrent_reqs = [
                grequests.get(info_url(uid), headers=HEADERS)
                for uid in sub_uid_list
            ]
            resp_list = grequests.map(concurrent_reqs)

            try:
                for uid, resp in zip(sub_uid_list, resp_list):
                    matches = re.findall(
                        u'%s.*%s' % (WEIBO_PATTERN, FOLLOWER_PATTERN), resp.text)
                    if matches:
                        weibo_cnt, follower_cnt = map(int, matches[0])

                        follower_info_list.append(info(
                            uid, weibo_cnt, follower_cnt))
                        logging.info(
                            '%s Weibo Count: %d, Follower Count: %d' % (
                                uid, weibo_cnt, follower_cnt))

                        if len(follower_info_list) > persist_thresh:
                            Follower.save_follower_info(follower_info_list)
                            logging.info('Persisted %d follower info entries' %
                                         len(follower_info_list))
                            follower_info_list = []
                    else:
                        logging.warn('Failed to get info of user %s' % uid)
            except Exception:
                logging.exception('Exception for retrieving user info')
                continue

        # Persist remaining list of info.
        if follower_info_list:
            Follower.save_follower_info(follower_info_list)
            logging.info('Persisted %d follower info entries' %
                         len(follower_info_list))
    else:
        logging.error('No UIDs provided. Wait for next time')

    # Schedule next task.
    logging.info('Schedule next fetching')
    scheduler.enter(
        SCHEDULE_INTERVAL, 1, fetch_follower_info, (scheduler,))
Ejemplo n.º 4
0
Archivo: crud.py Proyecto: nbgal/frame
def add_follower(user, follower):
    exists = Follower.query.filter(Follower.user_id == user,
                                   Follower.follower_id == follower).first()
    if exists is None:
        follower = Follower(follower_id=follower, user_id=user)
        db.session.add(follower)
        db.session.commit()
Ejemplo n.º 5
0
def follow_user(user_id):
    """Follow the user identified by `user_id`."""

    follower_id = request.form.get('uid')  # uid: the user making the request
    if not follower_id:
        abort(400)  # Bad request

    session_id = session_check(session['id'], request.remote_addr,
                               request.user_agent, int(follower_id))
    if session_id == '':
        session['id'] = ''
        abort(401)  # Unauthorized
    else:
        session['id'] = session_id

    if User.query.filter(User.id == user_id).one().deleted:
        abort(403, 'Cannot follow a deleted user.')
    elif Follower.query.filter(Follower.user_id == user_id,
                               Follower.follower_id == follower_id).first():
        abort(403, 'User already followed.')
    else:
        new_follow = Follower(user_id=user_id, follower_id=follower_id)

        db.session.add(new_follow)
        db.session.commit()

        return ('', 204)  # status 204: success, no content
Ejemplo n.º 6
0
def kill_zombies(scheduler):
    st = get_st()
    uids = Follower.get_zombie_uids(limit=ZOMBIE_KILL_LIMIT)
    if uids and st:
        logging.info('Try to delete %d zombie followers' % len(uids))
        for i in range(0, len(uids), CONCURRENT_CONN):
            sub_uid_list = uids[i:i + CONCURRENT_CONN]
            concurrent_reqs = [
                grequests.get(
                    remove_url(uid, st), headers=HEADERS,
                    allow_redirects=False)
                for uid in sub_uid_list
            ]
            resp_list = grequests.map(concurrent_reqs)

            deleted_uids = []
            for uid, resp in zip(sub_uid_list, resp_list):
                if not resp:
                    logging.warn('None response, ignore')
                if resp.status_code == 302:
                    deleted_uids.append(uid)
                    logging.info('Deleted %s' % uid)
                else:
                    logging.warn('Failed to delete %s because of %s' %
                                 (uid, resp.status_code))

            # Record back to DB about deleted UIDs.
            if deleted_uids:
                Follower.confirm_uid_deleted(deleted_uids)
                logging.info('Confirmed deletion of %d uids' % len(deleted_uids))
    else:
        logging.error(
            'Failed to necessary data, st: %s, uid length: %d' %
            (st, len(uids)))

    # Schedule next task.
    logging.info('Schedule next fetching')
    scheduler.enter(
        SCHEDULE_INTERVAL, 1, kill_zombies, (scheduler,))
Ejemplo n.º 7
0
def kill_zombies(scheduler):
    st = get_st()
    uids = Follower.get_zombie_uids(limit=ZOMBIE_KILL_LIMIT)
    if uids and st:
        logging.info('Try to delete %d zombie followers' % len(uids))
        for i in range(0, len(uids), CONCURRENT_CONN):
            sub_uid_list = uids[i:i + CONCURRENT_CONN]
            concurrent_reqs = [
                grequests.get(remove_url(uid, st),
                              headers=HEADERS,
                              allow_redirects=False) for uid in sub_uid_list
            ]
            resp_list = grequests.map(concurrent_reqs)

            deleted_uids = []
            for uid, resp in zip(sub_uid_list, resp_list):
                if not resp:
                    logging.warn('None response, ignore')
                if resp.status_code == 302:
                    deleted_uids.append(uid)
                    logging.info('Deleted %s' % uid)
                else:
                    logging.warn('Failed to delete %s because of %s' %
                                 (uid, resp.status_code))

            # Record back to DB about deleted UIDs.
            if deleted_uids:
                Follower.confirm_uid_deleted(deleted_uids)
                logging.info('Confirmed deletion of %d uids' %
                             len(deleted_uids))
    else:
        logging.error('Failed to necessary data, st: %s, uid length: %d' %
                      (st, len(uids)))

    # Schedule next task.
    logging.info('Schedule next fetching')
    scheduler.enter(SCHEDULE_INTERVAL, 1, kill_zombies, (scheduler, ))
Ejemplo n.º 8
0
def db_test_data():
    """Create sample data for test database."""

    # Add sample users
    u1 = User(uname='lemongrab')
    u2 = User(uname='bubblegum')
    u3 = User(uname='marceline')
    u4 = User(uname='simon')

    # Add sample posts
    p1 = Post(title="One million years dungeon",
              content="One million years dungeon",
              user=u1,
              created=datetime.utcnow())
    p2 = Post(title="Poor Lemongrab",
              content="You try your best",
              user=u2,
              references=[p1],
              created=datetime.utcnow())
    p3 = Post(title="Candy Kingdom",
              content="It's my favorite kingdom!",
              user=u2,
              created=datetime.utcnow())

    db.session.add_all([u1, u2, u3, u4, p1, p2, p3])
    db.session.commit()

    # Add bookmarks
    b1 = Bookmark(user=u3, post_id=p1.id)

    # Add followers
    f1 = Follower(user_id=u1.id, follower_id=u2.id)
    f2 = Follower(user_id=u2.id, follower_id=u3.id)
    f3 = Follower(user_id=u2.id, follower_id=u4.id)

    db.session.add_all([b1, f1, f2, f3])
    db.session.commit()
Ejemplo n.º 9
0
def follower():
    """Add followers to a user, display user followers."""

    if request.method == "GET":
        pass

    if request.method == "POST":
        current_user = request.form.get(
            'follower')  # (user in current session)
        followed = request.form.get(
            'follow')  #user_id (who you want to follow)
        followed_user = request.form.get('followed-user')

        new_follower = Follower(user_id=followed, follower=current_user)

        db.session.add(new_follower)
        db.session.commit()

        return redirect('/profile/' + str(followed_user))
Ejemplo n.º 10
0
def example_data():
    """Create example data for the test database."""
    Dislike.query.delete()
    Stargazer.query.delete()
    Watcher.query.delete()
    Follower.query.delete()
    Contributor.query.delete()
    RepoLanguage.query.delete()
    Language.query.delete()
    Repo.query.delete()
    Account.query.delete()
    User.query.delete()

    jane = User(user_id="1",
                login="******",
                name="Jane",
                last_crawled=datetime.datetime.now(),
                last_crawled_depth=2)
    alex = User(user_id="2",
                login="******",
                name="Alex",
                last_crawled=(datetime.datetime.now() -
                              datetime.timedelta(weeks=6)),
                last_crawled_depth=2)
    kelly = User(user_id="3", login="******", name="Kelly")
    db.session.add_all([jane, alex, kelly])
    db.session.commit()

    jane_account = Account(user_id="1", access_token="abc123")
    db.session.add(jane_account)
    db.session.commit()

    py_repo = Repo(repo_id="1",
                   name="python-repo",
                   description="A Python repository",
                   owner_id="1",
                   last_crawled=datetime.datetime.now(),
                   last_crawled_depth=2,
                   url="https://github.com/jhacks/python-repo",
                   stargazers_count=2)
    js_repo = Repo(repo_id="2",
                   name="js-repo",
                   description="A Javascript repository",
                   owner_id="1",
                   last_crawled=(datetime.datetime.now() -
                                 datetime.timedelta(weeks=6)),
                   last_crawled_depth=1,
                   url="https://github.com/jhacks/js-repo",
                   stargazers_count=1)
    db.session.add_all([py_repo, js_repo])
    db.session.commit()

    astar = Stargazer(repo_id="1", user_id="2")
    kstar = Stargazer(repo_id="1", user_id="3")
    kstar_js = Stargazer(repo_id="2", user_id="3")
    a_dislike_js = Dislike(repo_id="2", user_id="2")
    # k_dislike_js = Dislike(repo_id="2", user_id="3")
    db.session.add_all([astar, kstar, kstar_js, a_dislike_js])
    db.session.commit()

    kwatch = Watcher(repo_id="1", user_id="3")
    a_j_follow = Follower(user_id="1", follower_id="2")
    k_j_follow = Follower(user_id="1", follower_id="3")
    j_a_follow = Follower(user_id="2", follower_id="1")
    db.session.add_all([kwatch, a_j_follow, k_j_follow, j_a_follow])
    db.session.commit()

    jcon = Contributor(repo_id="1", user_id="1")
    kcon = Contributor(repo_id="1", user_id="3")
    db.session.add_all([jcon, kcon])
    db.session.commit()

    # python = Topic(topic_id="1", topic_name="python")
    # api = Topic(topic_id="2", topic_name="api")
    # db.session.add_all([python, api])
    # db.session.commit()

    # py_rep1 = RepoTopic(topic_id="1", repo_id="1")
    # api_rep1 = RepoTopic(topic_id="2", repo_id="1")
    # db.session.add_all([py_rep1, api_rep1])
    # db.session.commit()

    py_lang = Language(language_id="1", language_name="python")
    c_lang = Language(language_id="2", language_name="c")
    db.session.add_all([py_lang, c_lang])
    db.session.commit()

    py_lang_rep1 = RepoLanguage(language_id="1",
                                repo_id="1",
                                language_bytes=5000)
    c_lang_rep1 = RepoLanguage(language_id="2",
                               repo_id="1",
                               language_bytes=100)
    db.session.add_all([py_lang_rep1, c_lang_rep1])
    db.session.commit()
Ejemplo n.º 11
0
# coding=utf-8
import logging
import pickle
import sched

import grequests
import time

from model import Follower

if not Follower.table_exists():
    Follower.create_table()

FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(filename='log/killer.log',
                    level=logging.INFO,
                    format=FORMAT)
# Suppress other logging.
for k in logging.Logger.manager.loggerDict:
    logging.getLogger(k).setLevel(logging.WARNING)

# Load cookies.
cookies = pickle.load(open('data/cookies.pkl', 'rb'))
cookies_str = ';'.join('%s=%s' % (name, val) for name, val in cookies.items())
remove_url = 'http://weibo.cn/attention/remove?act=removec&uid={}&st={}'.format

HEADERS = {'Cookie': cookies_str}
SCHEDULE_INTERVAL = 60  # 1 min.
ZOMBIE_KILL_LIMIT = 5000
CONCURRENT_CONN = 15
Ejemplo n.º 12
0
# coding=utf-8
import logging
import pickle
import re
import sched
from collections import namedtuple

import grequests
import time

from model import Follower

if not Follower.table_exists():
    Follower.create_table()

FORMAT = '%(asctime)-15s %(message)s'
logging.basicConfig(
    filename='log/info_fetcher.log', level=logging.INFO, format=FORMAT)
# Suppress other logging.
for k in logging.Logger.manager.loggerDict:
    logging.getLogger(k).setLevel(logging.WARNING)

# Load cookies.
cookies = pickle.load(open('data/cookies.pkl', 'rb'))
cookies_str = ';'.join('%s=%s' % (name, val) for name, val in cookies.items())
info_url = 'http://weibo.cn/u/{}'.format

HEADERS = {
    'Cookie': cookies_str
}
WEIBO_PATTERN = u'微博\[(\d+)\]'