Example #1
0
def gather(database, source, config):
    # url should be like "http(s)://twitter.com/okfn" or simply "okfn"
    username = source.url[source.url.rfind('/') + 1:]
    user = tweepy.api.get_user(username)
    user_id = user.id
    user_realname = user.name
    statuses = tweepy.api.user_timeline(user_id, count=20)

    log.info("%s: %s" % (source.type, username))
    
    table = database['activity']
    for s in statuses:
        author = s.author.screen_name
        text = s.text
        dt = s.created_at
        url = "http://twitter.com/#!/%s/statuses/%d" % (username, s.id)
        data = {
            'author': user_realname,
            'title': text,
            'source_url': url,
            'description': text
            }
        data = make_activity(data, dt, source)
        table.writerow(data,
            unique_columns=['author', 'title', 'source_url'])
Example #2
0
def gather_pipermail(database, source, how_many_months=1):
    '''Gather mailman archives info.

    :param how_many_months: how many months back to go in the archives. Set to
        <= 0 for unlimited.
    '''
    url = source.url
    log.info(url)
    if 'mailman/listinfo' in url:
        url = url.replace('mailman/listinfo', 'pipermail')
    table = database['activity']
    for message in get_messages(url, how_many_months):
        subjects = message.get_all('Subject')
        subject = subjects[-1] if subjects else '(No Subject)'
        
        dates = message.get_all('Date')
        date = dates[-1] if dates else '(No date)'
        date = date.rsplit(' +', 1)[0].rsplit(' -', 1)[0].strip()
        date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S')
        # do not save description here as large
        # description =  message.get_payload()
        description = None
        data = {
            'author': message.get_from().split('  ')[0],
            'title': subject,
            'description': description,
            'source_url': url
            }
        data = make_activity(data, date, source)
        table.writerow(data, unique_columns=['author', 'title', 'datetime'])
Example #3
0
def gather(database, source):
    feed = feedparser.parse(source.feed_url)
    try:
        log.info("%s: %s" % (source.type, feed.feed.title))
    except AttributeError:
        log.error('Failed to retrieve: %s' % source.feed_url)
    table = database['activity']
    count = 0
    for e in feed.entries:
        count += 1
        try:
            author = e.author_detail.name
        except AttributeError:
            try:
                author = e.author
            except AttributeError:
                author = ''
        try:
            description = e.summary
        except AttributeError: 
            try:
                description = e.content[0].value
            except AttributeError:
                description = ''
        # HACKy
        if source.type == 'mediawiki':
            description = description.split('\n')[0]

        date = datetime.fromtimestamp(mktime(e.updated_parsed))
        data = {
            'author': author,
            'title': e.title,
            'source_url': e.link,
            'description': description
        }
        data = make_activity(data, date, source)
        table.writerow(
            data,
            unique_columns=['author', 'title', 'source_url']
            )
    return count
Example #4
0
def unique_place_id():
    place_id = 1
    while True:
        yield place_id
        place_id += 1


if __name__ == '__main__':
    place_id_iter = unique_place_id()
    activities = []

    # Generate activities for each city
    for city in config.ROUTER_NAMES:
        router = common.OtpRouter(city)
        for i in range(0, config.ACTIVITY_NUM_PER_CITY):
            activities.append(common.make_activity(router))

    # max workers set to 10, default is 2
    session = FuturesSession(max_workers=10)
    # headers = {'Authorization': 'Token {}'.format(config.AUTH_TOKEN)}
    session.headers['Authorization'] = 'Token {}'.format(config.AUTH_TOKEN)
    futures = []
    for activity in activities:
        start = datetime.now()
        from_id = next(place_id_iter)
        to_id = next(place_id_iter)
        url = config.LEADGEN_URL + 'activity/{}/{}/'.format(from_id, to_id)
        f = session.put(url, json=activity, background_callback=bg_cb)
        futures.append((f, start))

    # wait for requests to complete
Example #5
0
import requests

import config
import common


def unique_place_id():
    place_id = 337
    while True:
        yield place_id
        place_id += 1


if __name__ == '__main__':
    place_id_iter = unique_place_id()
    activities = []

    headers = {'Authorization': 'Token {}'.format(config.AUTH_TOKEN)}

    # Generate activities for each city
    for city in config.ROUTER_NAMES:
        router = common.OtpRouter(city)
        for i in range(0, config.ACTIVITY_NUM_PER_CITY):
            from_id = next(place_id_iter)
            to_id = next(place_id_iter)
            url = config.LEADGEN_URL + 'activity/{}/{}/'.format(from_id, to_id)
            activity = common.make_activity(router)
            r = requests.put(url, json=activity, headers=headers)
            print(r.status_code)