예제 #1
0
 def map_func(status):
     status_obj = dict(status)
     status_obj['id'] = unicode(status_obj['id'])
     status_obj['datetime'] = datetimes.extract_datetime(
         status_obj['datetime'])
     status_obj.update({'query': query, 'platform': platform.title()})
     return status_obj
예제 #2
0
 def map_func(status):
     status_obj = dict(status)
     status_obj['id'] = unicode(status_obj['id'])
     status_obj['datetime'] = datetimes.extract_datetime(
         status_obj['datetime'])
     status_obj.update({'query': query})
     return status_obj
예제 #3
0
 def get_posts(self):
     for post in self._get_post():
         if (post['text']['type'] == 'comment'
                 or post['text']['event'] != 'add'):
             logger.debug("Discard a comment for keyword <%s>" %
                          post['match_info']['keyword'])
             continue  #TODO: parse comments
         keyword = post['match_info']['keyword']
         status = post['text']['status']
         t = {
             'id': status['mid'],
             'datetime': datetimes.extract_datetime(status['created_at']),
             'username': status['user']['name'],
             'uid': status['user']['id'],
             'text': status['text'],
             'shares': status['reposts_count'],
             'replies': status['comments_count'],
             'uri': status['statusurl'].replace("http://", ""),
             'reach': status['user']['followers_count'],
             'gender': status['user']['gender'],
             'platform': 'sina weibo'
         }
         try:
             t['location'] = self._get_location(
                 status['user']['city_name'],
                 status['user']['province_name'],
                 status['user']['city_coordinates'])
         except IndexError:  # empty coordinates
             pass
         logger.debug("Got 1 post for keyword <%s>" % keyword)
         yield (keyword, t)
예제 #4
0
def fetch_historic_platform(query, platform):
    subscription = Subscription(query)
    manager = SubscriptionManager()

    if not subscription:
        logger.warning(
            u"%s not found in the subscription list. (Maybe have been deleted.)"
            % query)
        return

    if subscription.has_historic_data(platform):
        logger.debug(u"Already fetched historic from %s for %s. Skipping..." %
                     (platform, query))
        return

    manager.mark_earliest_datetime(query, datetimes.now(),
                                   platform)  # lock the subscription
    results = active_platforms[platform]().search(
        subscription.get_query_obj(),
        historic=True,
        age_filter=get_age_filter(subscription.get_created_datetime(), True))
    logger.info("Pushing results to SQS...")
    push(results)

    logger.info("Updating earlist_datetime...")
    e_datetime = None
    for d in [r['datetime'] for r in results]:
        if not e_datetime:
            e_datetime = d
            continue
        if e_datetime > d:
            e_datetime = d
    if e_datetime:
        manager.mark_earliest_datetime(query,
                                       datetimes.extract_datetime(e_datetime),
                                       platform)

    logger.info(u"Finished fetching historic %s from %s" % (query, platform))
예제 #5
0
def fetch_platform(query, platform):
    subscription = Subscription(query)
    manager = SubscriptionManager()

    logger.debug(u"Received request to fetch new posts for %s" % query)
    if not subscription.ready_for_next(platform):
        logger.debug(u"Not ready for next fetch(%s, %s). Skipping..." %
                     (query, platform))
        return
    manager.mark_next_query_datetime(query, platform)  # lock the subscription
    results = active_platforms[platform]().search(
        subscription.get_query_obj(),
        historic=False,
        age_filter=get_age_filter(subscription.get_latest_datetime(platform),
                                  historic=False))
    logger.debug("Pushing results to SQS...")
    push(results)

    logger.debug("Updating latest_datetime...")
    l_datetime = None
    for d in [r['datetime'] for r in results]:
        if not l_datetime:
            l_datetime = d
            continue
        if l_datetime < d:
            l_datetime = d
    if l_datetime:
        manager.mark_latest_datetime(query,
                                     datetimes.extract_datetime(l_datetime),
                                     platform)

    logger.debug("Updating next_query_datetime...")
    manager.mark_next_query_datetime(
        query, platform,
        active_platforms[platform]().next_query_time(len(results)))

    logger.info(u"Finished fetching %s from %s" % (query, platform))
예제 #6
0
 def is_older(p_datetime_str):
     p_datetime = datetimes.extract_datetime(p_datetime_str)
     #            logger.debug("%s, %s" % (p_datetime.isoformat(), (baseline_datetime - datetime.timedelta(minutes=5)).isoformat()))
     return p_datetime < (baseline_datetime - datetime.timedelta(
         minutes=5)) if p_datetime else False
예제 #7
0
 def is_newer(p_datetime_str):
     p_datetime = datetimes.extract_datetime(p_datetime_str)
     return p_datetime >= baseline_datetime if p_datetime else False