Example #1
0
def _addRecrawlTime(pend_id, recrawl_time, crawlfail=False):
    pvalues = {}
    pvalues['id'] = pend_id
    pvalues['recrawl_times'] = recrawl_time + 1
    if crawlfail:
        pend_ref = models.CrawlFailPending()
    else:
        pend_ref = models.CrawlPending()
    pend_ref.update(pvalues)
    pend_ref.save(update=True)
Example #2
0
def _updateScheduleTime(pend_id, crawlfail=False):
    pvalues = {}
    pvalues['id'] = pend_id
    pvalues['schedule_time'] = int(timeutils.utcnow_ts())
    if crawlfail:
        pend_ref = models.CrawlFailPending()
    else:
        pend_ref = models.CrawlPending()
    pend_ref.update(pvalues)
    pend_ref.save(update=True)
Example #3
0
def _updateCrawlStatus(pend_id, crawl_status, crawlfail=False):
    pvalues = {}
    pvalues['id'] = pend_id
    pvalues['crawl_status'] = crawl_status
    if crawlfail:
        pend_ref = models.CrawlFailPending()
    else:
        pend_ref = models.CrawlPending()
    pend_ref.update(pvalues)
    pend_ref.save(update=True)
Example #4
0
def _updateScheduleDoc(pend_id, recrawl_time, crawl_status, crawlfail=False):
    pvalues = {}
    pvalues['id'] = pend_id
    pvalues['crawl_status'] = crawl_status
    pvalues['schedule_time'] = int(timeutils.utcnow_ts())
    pvalues['recrawl_times'] = recrawl_time + 1
    if crawlfail:
        pend_ref = models.CrawlFailPending()
    else:
        pend_ref = models.CrawlPending()
    pend_ref.update(pvalues)
    pend_ref.save(update=True)
Example #5
0
def saveFailCrawlDoc(crawldoc):
    '''step1: save crawl fail crawldoc to crawl_fail_result, docid can repeat
       step2: update crawl url status which at crawl_pending to crawled
       step3: update or insert crawl fail status which at crawl_fail_pending'''
    values = crawldoc.convert
    values['reservation_dict'] = str(crawldoc.reservation_dict)
    values['created_at'] = timeutils.utcnow_ts()
    utils.convert_datetimes(values, 'created_at', 'deleted_at', 'updated_at')
    crawldoc_ref = models.CrawlFailResult()
    crawldoc_ref.update(values)
    crawldoc_ref.save()
    _updateCrawlStatus(crawldoc.pending_id, 'crawled', crawlfail=False)

    pvalues = {}
    pvalues['request_url'] = crawldoc.request_url
    pvalues['reservation_dict'] = str(crawldoc.reservation_dict)
    pvalues['level'] = crawldoc.level
    pvalues['detect_time'] = crawldoc.detect_time
    pvalues['crawl_status'] = 'fresh'
    pvalues['parent_docid'] = crawldoc.parent_docid
    pvalues['recrawl_times'] = 0
    pend_ref = models.CrawlFailPending()
    pend_ref.update(pvalues)
    pend_ref.save()