Exemple #1
0
def search_term(harvester, snh_search):
    if debugging: dLogger.log("search_term()")

    new_statuses_list = []
    last_harvested_status = snh_search.latest_status_harvested

    max_id = None
    if last_harvested_status:
        max_id = int(last_harvested_status.fid) - 1
        if debugging:
            dLogger.log('    Latest statuse harvested date: %s' %
                        last_harvested_status.created_at)

    while len(new_statuses_list) < 80:
        status_id_list = collect_tweets_from_html(harvester, snh_search,
                                                  max_id)

        if len(status_id_list) > 0:
            new_statuses_list += status_id_list
            max_id = int(status_id_list[-1]) - 1
        else:
            break

    if len(new_statuses_list) == 0:
        return None
    return new_statuses_list
Exemple #2
0
def collect_tweets_from_html(harvester, snh_search, max_id=None):
    if debugging:
        dLogger.log('collect_tweets_from_html()')
        #dLogger.log('    snh_search: %s'%snh_search)
        #dLogger.log('    max_id: %s'%max_id)

    since = datetime.strftime(harvester.harvest_window_from, '%Y-%m-%d')
    until = datetime.strftime(harvester.harvest_window_to, '%Y-%m-%d')

    query = snh_search.term.encode('utf-8')
    params = '%s since:%s until:%s' % (query, since, until)
    if max_id: params += ' max_id:%s' % max_id
    strUrl = 'https://twitter.com/hashtag/' + urllib.quote(params)

    if debugging: dLogger.log('    URL: %s' % strUrl)
    url = urllib2.Request(strUrl, headers={'User-Agent': 'Mozilla/5.0'})
    try:
        data = urllib2.urlopen(url, timeout=5)
    except:
        time.sleep(1)
        data = urllib2.urlopen(url, timeout=5)
    page = bs(data, "html.parser")
    tweetBox = page.find('ol', id='stream-items-id')
    tweets = tweetBox.findAll('li')
    status_id_list = []
    for tweet in tweets:
        if tweet.has_attr('data-item-id'):
            status_id_list.append(tweet['data-item-id'])
    return status_id_list
    def update_comment_status(self, comment, post):
        if debugging:
            dLogger.log("<ThreadComment#%s>::update_comment_status()" %
                        self.ident)
            #dLogger.log("    comment: %s"%comment)
            #dLogger.log("    message: %s"%unicode(comment['message']))

        fbcomment = None
        try:
            try:
                fbcomment = FBComment.objects.get(fid=comment["id"])
            except ObjectDoesNotExist:
                fbcomment = FBComment(post=post)
                fbcomment.save()
            fbcomment.update_from_facebook(comment, post)
        except IntegrityError:
            try:
                fbcomment = FBComment.objects.get(fid=comment["id"])
                fbcomment.update_from_facebook(comment, post)
            except ObjectDoesNotExist:
                msg = u"ERROR! Comments already exist but not found%s for %s" % (
                    unicode(comment), post.fid if post.fid else "0")
                logger.exception(msg)
                if debugging: dLogger.exception(msg)
        except:
            msg = u"<p style='red'>Cannot update comment %s for %s</p>" % (
                unicode(comment), post.fid if post.fid else "0")
            logger.exception(msg)
            if debugging: dLogger.exception(msg)
        return fbcomment
def update_user_batch(harvester):
    if debugging: dLogger.log("update_user_batch()")

    #get 10000 unupdated FBUsers
    all_users = harvester.fbusers_to_harvest.filter(
        pk__gt=harvester.dont_harvest_further_than)
    if all_users.count() == 0:
        harvester.dont_harvest_further_than = 0
        harvester.save()
        all_users = harvester.fbusers_to_harvest.filter(
            pk__gt=harvester.dont_harvest_further_than)
    all_users = all_users.filter(pk__lt=harvester.dont_harvest_further_than +
                                 10000)

    batch_man = []
    for snhuser in all_users:
        if not snhuser.error_triggered:
            uid = snhuser.fid if snhuser.fid else snhuser.username
            #if debugging: dLogger.log("    uid: %s"%uid)
            d = {"method": "GET", "relative_url": str(uid)}
            #if debugging: dLogger.log("    d: %s"%d)
            batch_man.append({
                "snh_obj": snhuser,
                "retry": 0,
                "request": d,
                "callback": update_user_from_batch
            })
        else:
            logger.info(
                u"Skipping user update: %s(%s) because user has triggered the error flag."
                % (unicode(snhuser), snhuser.fid if snhuser.fid else "0"))
    generic_batch_processor_v2(harvester, batch_man)
def update_user_statuses_batch(harvester):
    if debugging:
        dLogger.log("update_user_statuses_batch()")

    all_users = harvester.fbusers_to_harvest.all()
    batch_man = []

    for snhuser in all_users:
        if not snhuser.error_triggered:
            uid = snhuser.fid if snhuser.fid else snhuser.username
            d = {
                "method":
                "GET",
                "relative_url":
                str("%s/feed?limit=250&fields=comments.limit(0).summary(true),\
likes.limit(0).summary(true),shares,message,message_tags,name,caption,description,properties,privacy,type,\
place,story,story_tags,object_id,application,updated_time,picture,link,source,icon,from"
                    % str(uid))
            }
            #if debugging: dLogger.log("    d: %s"%d)
            batch_man.append({
                "snh_obj": snhuser,
                "retry": 0,
                "request": d,
                "callback": update_user_feed_from_batch
            })
        else:
            logger.info(
                u"Skipping status update: %s(%s) because user has triggered the error flag."
                % (unicode(snhuser), snhuser.fid if snhuser.fid else "0"))

    #usage = psutil.virtual_memory()
    logger.info(u"Will harvest statuses for %s" % (harvester))
    generic_batch_processor_v2(harvester, batch_man)
Exemple #6
0
    def update_user_fk(self, self_prop, face_prop, facebook_model):
        #if debugging: dLogger.log("<FBComment: %s>::update_user_fk()"%self.fid)

        model_changed = False
        if face_prop in facebook_model:
            prop_val = facebook_model[face_prop]
            if prop_val and (self_prop is None or self_prop.fid != prop_val["id"]):
                user = None
                user = self.get_existing_user({"fid__exact":prop_val["id"]})

                if not user:
                    try:
                        user = FBUser()
                        user.update_from_facebook(prop_val)
                        if debugging: dLogger.log("    new user created: %s"%user)
                    except IntegrityError:
                        user = self.get_existing_user({"fid__exact":prop_val["id"]})
                        if user:
                            user.update_from_facebook(prop_val)
                        else:
                            logger.debug(u">>>>CRITICAL CANT UPDATED DUPLICATED USER %s" % prop_val["id"])

                self_prop = user
                model_changed = True

        return model_changed, self_prop
Exemple #7
0
def custom_migration():
    params = [
        'harvester_type',
        'client',
        'tt_client',
        'consumer_key',
        'consumer_secret',
        'access_token_key',
        'access_token_secret',
        'remaining_search_hits',
        'remaining_user_timeline_hits',
        'remaining_user_lookup_hits',
        'reset_time_in_seconds',
        'hourly_limit',
        'reset_time',
        #'twusers_to_harvest',
        #'twsearch_to_harvest',
        'last_harvested_user',
        'current_harvested_user',
        'last_updated_user',
        'current_updated_user',
    ]
    for harv2 in TwitterHarvester2.objects.all():
        harv = TwitterHarvester.objects.create()
        for param in params:
            dLogger.log('param: %s' % param)
            setattr(harv, param, getattr(harv2, param))
        harv.save()
        if debugging: dLogger.log('copied %s' % harv)
Exemple #8
0
def sort_harvesters_by_priority(all_harvesters):
    if debugging: dLogger.log("sort_harvesters_by_priority()")

    new_harvesters = [
        harv for harv in all_harvesters if harv.last_harvest_start_time == None
    ]
    aborted_harvesters = [
        harv for harv in all_harvesters if
        harv.current_harvest_start_time != None and harv not in new_harvesters
    ]
    clean_harvesters = [
        harv for harv in all_harvesters
        if harv not in aborted_harvesters and harv not in new_harvesters
    ]

    sorted_harvester_list = new_harvesters
    sorted_harvester_list += sorted(
        clean_harvesters,
        key=lambda harvester: harvester.last_harvest_start_time)
    sorted_harvester_list += sorted(
        aborted_harvesters,
        key=lambda harvester: harvester.current_harvest_start_time)
    if debugging:
        dLogger.log('    sorted_harvester_list: %s' % sorted_harvester_list)
    return sorted_harvester_list
 def get_stats(self):
     if debugging:
         dLogger.log("get_stats()")
         dLogger.log(
             "    remaining_hits (search)(timeline)(user): (%s)(%s)(%s)" %
             (self.remaining_search_hits, self.remaining_user_timeline_hits,
              self.remaining_user_lookup_hits))
         dLogger.log("    reset_time: %s" % self.reset_time)
         dLogger.log("    last_harvested_user: %s" %
                     self.last_harvested_user)
         dLogger.log("    current_harvested_user: %s" %
                     self.last_harvested_user)
     parent_stats = super(TwitterHarvester, self).get_stats()
     parent_stats["concrete"] = {
         "remaining_hits (search)(timeline)(user)":
         (self.remaining_search_hits, self.remaining_user_timeline_hits,
          self.remaining_user_lookup_hits),
         "reset_time_in_seconds":
         self.reset_time_in_seconds,
         "hourly_limit":
         self.hourly_limit,
         "reset_time":
         self.reset_time,
         "last_harvested_user":
         unicode(self.last_harvested_user),
         "current_harvested_user":
         unicode(self.current_harvested_user),
     }
     return parent_stats
def get_tw_harvester_status_list(request, call_type, harvester_id):
    dLogger.log('get_tw_harvester_status_list()')
    querySet = None
    columnIndexNameMap = {
                            0 : u'created_at',
                            1 : u'fid',
                            2 : u'text',
                            3 : u'retweet_count',
                            4 : u'retweeted',
                            5 : u'source',
                            }
    if harvester_id == '0':
        querySet = TWStatus.objects.all()
    else:
        harvester = get_list_or_404(TwitterHarvester, pmk_id=harvester_id)[0]
        if harvester.twusers_to_harvest.count() > 100:
            return twUserAjaxTableError("Too many items to display")

        # merge two conditional filter in queryset:
        conditionList = [Q(user=user) for user in harvester.twusers_to_harvest.all()]
        conditionList += [Q(TWSearch_hit=search) for search in harvester.twsearch_to_harvest.all()]
        querySet = TWStatus.objects.filter(reduce(lambda x, y: x | y, conditionList)).distinct()

    #call to generic function from utils
    return get_datatables_records(request, querySet, columnIndexNameMap, call_type)
    def update_client_stats(self):
        """updates the remaining api calls the instance is allowed to do before annoying Twitter
        """

        if debugging: dLogger.log("update_client_stats()")

        c = self.get_client()

        rates = c.GetRateLimitStatus("search,statuses,users")["resources"]
        searchRates = rates["search"]["/search/tweets"]
        userLookupRates = rates["users"]["/users/lookup"]
        statusTimelineRates = rates["statuses"]["/statuses/user_timeline"]

        self.remaining_search_hits = rates["statuses"]["/statuses/lookup"][
            'remaining']
        self.remaining_user_timeline_hits = statusTimelineRates["remaining"]
        self.remaining_user_lookup_hits = userLookupRates["remaining"]

        self.reset_time_in_seconds = max(
            searchRates["reset"],
            userLookupRates["reset"],
            statusTimelineRates["reset"],
        )
        self.hourly_limit = searchRates[
            "limit"]  # Since all three limits are the same
        self.reset_time = time.strftime(
            '%Y-%m-%d %H:%M:%S', time.localtime(self.reset_time_in_seconds))
        self.save()
Exemple #12
0
    def start_new_harvest(self):
        if debugging: dLogger.log('start_new_harvest()')

        self.current_harvest_start_time = datetime.now()
        self.current_harvest_call_count = 0
        self.harvest_in_progress = True
        self.save()
    def get_latest_status(self):
        if debugging: dLogger.log("get_latest_status()")

        latest_status = None
        statuses = TWStatus.objects.filter(user=self).order_by("created_at")
        for latest_status in statuses:
            break
        return latest_status
def gbp_core(harvester, bman_chunk, error_map, next_bman_list, failed_list):
    if debugging:
        dLogger.log("gbp_core()")
        #dLogger.log("    harvester: %s"%harvester)
        #dLogger.log("    bman_chunk: %s"%bman_chunk)
        #dLogger.log("    error_map: %s"%error_map)
        #dLogger.log("    next_bman_list: %s"%next_bman_list)
        #dLogger.log("    failed_list: %s"%failed_list)

    error = False

    try:
        urlized_batch = [
            bman_chunk[j]["request"] for j in range(0, len(bman_chunk))
        ]
        #if debugging: dLogger.log("    urlized_batch: %s"%urlized_batch)
        batch_result = harvester.api_call("request", {
            'path': '',
            'post_args': {
                "batch": urlized_batch
            }
        })

        #dLogger.pretty(batch_result)

        for (counter, fbobj) in enumerate(batch_result):
            bman_obj = bman_chunk[counter]

            if type(fbobj) == dict:
                next = bman_obj["callback"](harvester, bman_obj["snh_obj"],
                                            fbobj)
                if next:
                    next_bman_list += next
            else:
                e_code = gbp_error_man(bman_obj, fbobj)
                if e_code == E_UNEX:
                    error = True
                error_map[e_code] = error_map[
                    e_code] + 1 if e_code in error_map else 0

                if e_code in E_CRITICALS:
                    failed_list.append(bman_obj)
                else:
                    next_bman_list.append(bman_obj)

    except FacepyError, fex:
        e_code = gbp_facepyerror_man(fex, {"bman_chunk": bman_chunk})
        if e_code == E_UNEX:
            error = True
        error_map[e_code] = error_map[e_code] + 1 if e_code in error_map else 0

        if e_code in E_CRITICALS:
            msg = u"CRITICAL gbp_core: Unmanaged FacepyError error:%s. Aborting a full bman_chunk." % (
                e_code)
            logger.exception(msg)
            failed_list += bman_chunk
        else:
            next_bman_list += bman_chunk
    def get_tt_client(self):
        if debugging: dLogger.log("get_tt_client()")

        if not self.tt_client:
            self.tt_client = Twython(self.consumer_key, self.consumer_secret,
                                     self.access_token_key,
                                     self.access_token_secret)

        return self.tt_client
Exemple #16
0
def run_twitter_harvester():
    if debugging: dLogger.log("run_twitter_harvester()")

    #custom_export()
    #return

    harvester_list = sort_harvesters_by_priority(
        TwitterHarvester.objects.all())
    for harvester in harvester_list:
        harvester.harvest_in_progress = False
        harvester.save()

    logger.info('Will run in order: %s' % harvester_list)

    try:
        for harvester in harvester_list:
            logger.info(u"The harvester %s is %s" %
                        (unicode(harvester),
                         "active" if harvester.is_active else "inactive"))

            if harvester.is_active:
                harvester.start_new_harvest()
                harvester.update_client_stats()

                if harvester.remaining_user_lookup_hits <= 0:
                    warn = u"The harvester %s has exceeded the user lookup rate limit. Need to wait? %s" % (
                        unicode(harvester), harvester.get_stats())
                    logger.warning(warn)
                else:
                    run_users_update(harvester)
                    harvester.update_client_stats()

                if harvester.remaining_user_timeline_hits <= 0 and harvester.remaining_user_lookup_hits <= 0:
                    warn = u"The harvester %s has exceeded the status rate limits. Need to wait? %s" % (
                        unicode(harvester), harvester.get_stats())
                    logger.warning(warn)
                else:
                    run_harvester_timeline(harvester)
                    harvester.update_client_stats()

                if harvester.remaining_search_hits <= 0:
                    warn = u"The harvester %s has exceeded the search rate limit. Need to wait? %s" % (
                        unicode(harvester), harvester.get_stats())
                    logger.warning(warn)
                else:
                    run_harvester_search(harvester)
                    harvester.update_client_stats()

                harvester.end_current_harvest()
        if debugging: dLogger.log('Harvest has ended for all harvesters')
    except:
        for harvester in harvester_list:
            harvester.harvest_in_progress = False
            harvester.save()
        raise
Exemple #17
0
    def update_from_youtube(self, snh_video, snh_user, yt_comment):  #Comment
        if debugging:
            dLogger.log("<YTComment: '%s'>::update_from_youtube()" % self)
            #dLogger.pretty(yt_comment)

        model_changed = False

        fid = yt_comment['id']

        if self.fid != fid:
            self.fid = fid
            model_changed = True

        snippet = yt_comment['snippet']

        if self.video != snh_video:
            self.video = snh_video
            model_changed = True

        if self.user != snh_user:
            self.user = snh_user
            model_changed = True

        yt_published = snippet['publishedAt']
        date_val = datetime.strptime(yt_published[:-5], '%Y-%m-%dT%H:%M:%S')
        if self.published != date_val:
            self.published = date_val
            model_changed = True

        yt_updated = snippet['updatedAt']
        date_val = datetime.strptime(yt_updated[:-5], '%Y-%m-%dT%H:%M:%S')
        if self.updated != date_val:
            self.updated = date_val
            model_changed = True

        content = snippet['textDisplay'].encode('unicode_escape')
        content = re.sub(r'\\\\x..', '', content)
        if self.message != content:
            self.message = content
            model_changed = True

        like_count = snippet['likeCount']
        if self.like_count != like_count:
            self.like_count = like_count
            model_changed = True

        if model_changed:
            self.model_update_date = datetime.utcnow()
            try:
                self.save()
            except Exception, e:
                dLogger.log('    Error while saving:')
                dLogger.exception(e)
                dLogger.pretty(str(yt_comment).encode('unicode_escape'))
Exemple #18
0
def manage_exception(retry_count, harvester, user):
    if debugging:
        dLogger.log(
            "manage_exception(retry_count: %s, harvester: %s, user: %s)" %
            (retry_count, harvester, user))
    msg = u"Exception for the harvester %s for %s. Retry:%d" % (
        harvester, unicode(user), retry_count)
    logger.exception(msg)
    if debugging: dLogger.exception(msg)
    retry_count += 1
    return (retry_count, retry_count > harvester.max_retry_on_fail)
Exemple #19
0
    def end_current_harvest(self):
        if debugging: dLogger.log('start_new_harvest()')

        self.last_harvest_start_time = self.current_harvest_start_time
        self.last_harvest_end_time = datetime.now()
        self.current_harvest_start_time = None
        self.last_harvest_call_count = self.current_harvest_call_count
        self.last_user_harvest_was_aborted = bool(
            self.get_current_harvested_user())
        self.harvest_in_progress = False
        self.save()
def update_user_comments_from_batch(harvester, statusid, fbcomments_page):
    #if debugging:
    #dLogger.log("update_user_comments_from_batch(statusid: %s)"%statusid)

    next_bman = []

    #if "data" not in fbcomments_page:
    #logger.debug("DEVED: %s: %s" % (statusid, fbcomments_page))
    comment_count = None
    fbcomments_page = json.loads(fbcomments_page['body'])
    if not 'error' in fbcomments_page:
        comment_count = len(fbcomments_page["data"])
    else:
        logger.debug('ERROR: status %s could not be harvested: %s' %
                     (statusid, fbcomments_page['error']))

    if comment_count:

        waitCount = 0
        for comment in fbcomments_page["data"]:
            res = FBResult()
            res.harvester = harvester
            res.result = comment
            res.ftype = "FBComment"
            res.fid = comment["id"]
            res.parent = statusid
            res.save()
            waitCount += 1

        if debugging:
            dLogger.log("    %s more comments in waiting..." % waitCount)

        paging, new_page = get_comment_paging(fbcomments_page)

        #usage = psutil.virtual_memory()
        #logger.debug(u"Updating %d comments. New: %s Paging: %s Mem:%s MB" % (comment_count, new_page, paging, int(usage[4])/(1024.0)))

        if new_page:
            d = {
                "method": "GET",
                "relative_url":
                str("%s/comments?limit=250%s" % (statusid, paging))
            }
            next_bman.append({
                "snh_obj": statusid,
                "retry": 0,
                "request": d,
                "callback": update_user_comments_from_batch
            })
    #else:
    #    logger.debug("Empty comment page!! %s" % fbcomments_page)

    return next_bman
Exemple #21
0
def update_user_status(status, user, keepRaw):
    #if debugging: dLogger.log( "update_user_status(status: '%s...', user: %s)"%(status.text[:60], user.screen_name))
    try:
        tw_status = TWStatus.objects.get(fid__exact=status.id)
    except ObjectDoesNotExist:
        tw_status = TWStatus(user=user)
        tw_status.save()
        if debugging:
            dLogger.log("    New <TWStatus> created('%s...')" % (tw_status))
    tw_status.update_from_twitter(status, user, keepRaw)
    user.last_harvested_status = tw_status
    user.save()
def generate_csv_stream(request, dataLength, data, filename='output.csv'):
    dLogger.log("generate_csv_stream()")

    def data():
        for i in xrange(dataLength):
            csvfile = StringIO.StringIO()
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(data[i])
            yield csvfile.getvalue()

    response = HttpResponse(data(), mimetype="text/csv")
    response["Content-Disposition"] = "attachment; filename=%s" % filename
    return response
Exemple #23
0
def run_harvester_search(harvester):
    if debugging:
        dLogger.log("run_harvester_search(harvester: %s)" % (harvester))

    logger.info(u"START SEARCH: %s Stats:%s" %
                (harvester, unicode(harvester.get_stats())))
    try:
        all_twsearch = harvester.twsearch_to_harvest.all()
        search_all_terms(harvester, all_twsearch)
    except twitter.TwitterError, e:
        msg = u"ERROR for %s" % harvester
        logger.exception(msg)
        if debugging: dLogger.exception(msg)
def compute_results(harvester):
    if debugging:
        dLogger.log("compute_results()")
        dLogger.log("    %s items to analyze" % FBResult.objects.count())

    if FBResult.objects.filter(harvester=harvester).count() != 0:
        start = time.time()
        logger.info(u"Starting results computation")
        compute_new_post(harvester)
        compute_new_comment(harvester)
        FBResult.objects.filter(harvester=harvester).delete()
        logger.info(u"Results computation complete in %ss" %
                    (time.time() - start))
    def build_harvester_sequence(self):
        if debugging: dLogger.log("build_harvester_sequence()")
        self.haverst_deque = deque()
        all_users = list(self.twusers_to_harvest.all())

        if self.last_harvested_user:
            startIndex = all_users.index(self.last_harvested_user)
            retry_last_on_fail = 1 if self.retry_user_after_abortion and self.last_user_harvest_was_aborted else 0
            self.haverst_deque.extend(all_users[startIndex +
                                                retry_last_on_fail:])
            self.haverst_deque.extend(all_users[:startIndex +
                                                retry_last_on_fail])
        else:
            self.haverst_deque.extend(all_users)
def update_user_from_batch(harvester, snhuser, fbuser):
    if debugging:
        dLogger.log("update_user_from_batch()")
        #dLogger.log("fbuser: %s"%fbuser)
    try:
        snhuser.update_from_facebook(fbuser)
    except BaseException:
        logger.info('update failed for user %s' % snhuser)
        snhuser.error_triggered = True
        snhuser.save()
    #Recycling an unused field to store the last updated user. Not a good solution. To be revised.
    harvester.dont_harvest_further_than = snhuser.pk
    harvester.save()
    return None
Exemple #27
0
    def get_next_user_to_harvest(self):
        if debugging: dLogger.log("%s::get_next_user_to_harvest()" % self)
        if self.current_harvested_user:
            self.last_harvested_user = self.current_harvested_user

        if self.haverst_deque is None:
            self.build_harvester_sequence()

        try:
            self.current_harvested_user = self.haverst_deque.pop()
        except IndexError:
            self.current_harvested_user = None

        self.update_client_stats()
        return self.current_harvested_user
Exemple #28
0
def run_users_update(harvester):
    if debugging: dLogger.log("run_users_update(harvester: %s)" % (harvester))

    logger.info(u"START user update: %s Stats:%s" %
                (harvester, unicode(harvester.get_stats())))
    while harvester.remaining_user_lookup_hits > 0:
        logger.debug(u"New user batch to update. User lookup hits to go: %s" %
                     (harvester.remaining_user_lookup_hits))
        user_batch = harvester.get_next_user_batch_to_update()
        if user_batch:
            update_user_batch(harvester, user_batch)
        else:
            break
    logger.info(u"End user update for %s Stats:%s" %
                (harvester, unicode(harvester.get_stats())))
def fb(request, harvester_id):
    facebook_harvesters = FacebookHarvester.objects.all()
    dLogger.log('facebook_harvesters: %s' % facebook_harvesters)
    return render_to_response(
        u'snh/facebook.html', {
            u'fb_selected': True,
            u'all_harvesters': facebook_harvesters,
            u'harvester_id': harvester_id,
            'status_fields': izip_longest(*fb_posts_fields),
            'comment_fields': izip_longest(*fb_comments_fields),
            'years': choiceYears,
            'months': choiceMonths,
            'days': choiceDays,
            "now": now,
        })
Exemple #30
0
    def build_harvester_sequence(self):
        if debugging: dLogger.log("%s::build_harvester_sequence()" % self)
        self.haverst_deque = deque()
        all_users = self.ytusers_to_harvest.all()

        if self.last_harvested_user:
            count = 0
            for user in all_users:
                if user == self.last_harvested_user:
                    break
                count = count + 1
            retry_last_on_fail = 1 if self.retry_user_after_abortion and self.last_user_harvest_was_aborted else 0
            self.haverst_deque.extend(all_users[count + retry_last_on_fail:])
            self.haverst_deque.extend(all_users[:count + retry_last_on_fail])
        else:
            self.haverst_deque.extend(all_users)