Beispiel #1
0
def monitor_items_usa():
    data = json.loads(
        requests.get(
            'http://localhost:9200/timcho_item_v3/_search?q=price.currency.name:%22USD%22&fields='
        ).text)
    if 'hits' in data and 'total' in data['hits']:
        metric.gauge(cfg.ITEMS_USA, data['hits']['total'])
    else:
        metric.gauge(cfg.ITEMS_USA, 0)
Beispiel #2
0
def compare_items_crawled():
    data_total = requests.get(HOST +
                              '/crawled_products?missing=expired').json()
    if data_total['total_items']:
        metric.gauge('total_items_crawled', data_total['total_items'])

    data_20day = requests.get(
        HOST +
        '/crawled_products?sincedays=20&limit=0&missing=expired').json()
    if data_20day['total_items']:
        metric.gauge('total_items_crawled_20day', data_20day['total_items'])
Beispiel #3
0
def top_spiders():
    top_spiders = 0
    data_top = requests.get(
        HOST +
        '/crawled_products?sincedays=20&limit=0&missing=expired,feed&facetsize=1000'
    ).json()
    if data_top['sources']:
        for source in data_top['sources']:
            if int(source['count']) > 100:
                top_spiders += 1
        metric.gauge('top_spiders', top_spiders)
    data_total = requests.get(
        HOST + '/crawled_products?missing=expired,feed&limit=0&facetsize=5000'
    ).json()
    if data_total['sources']:
        metric.gauge('total_spiders', len(data_total['sources']))
Beispiel #4
0
 def log(self, spider):
     items = self.stats.get_value('item_scraped_count', 0)
     pages = self.stats.get_value('response_received_count', 0)
     irate = (items - self.itemsprev) * self.multiplier
     prate = (pages - self.pagesprev) * self.multiplier
     self.pagesprev, self.itemsprev = pages, items
     if config.DATADOG_USED == True:
         metr.gauge(cfg.TRACKING_MERCHANT + "pages_count",
                    pages,
                    tags=['source:%s' % spider.name])
         metr.gauge(cfg.TRACKING_MERCHANT + "pages_speed",
                    prate,
                    tags=['source:%s' % spider.name])
         metr.gauge(cfg.TRACKING_MERCHANT + "items_count",
                    items,
                    tags=['source:%s' % spider.name])
         metr.gauge(cfg.TRACKING_MERCHANT + "items_speed",
                    irate,
                    tags=['source:%s' % spider.name])
     msg = ("Crawled %(pages)d pages (at %(pagerate)d pages/min), "
            "scraped %(items)d items (at %(itemrate)d items/min)")
     log_args = {
         'pages': pages,
         'pagerate': prate,
         'items': items,
         'itemrate': irate
     }
     logger.info(msg, log_args, extra={'spider': spider})
     if items - self.items_crawled == 0 and irate == 0:
         self.count_zero_sequent += 1
     else:
         self.count_zero_sequent = 0
     if self.count_zero_sequent >= 15:
         if config.DATADOG_USED == True:
             metr.event(
                 "SPIDER DON'T PARSE ITEMS IN LONG TIME!",
                 "[" + spider.name +
                 "]Spider error: Don't parse items in long time, please check rule.",
                 "warning", util.get_name_server())
             metr.incr(cfg.SPIDER_WARNING, 1)
         import requests
         requests.get("http://localhost:6082/crawler/stopcrawl?spider=" +
                      spider.name)
     self.items_crawled = items
Beispiel #5
0
def fresh_items():
    data = requests.get(
        HOST + '/crawled_products?sincedays=20&missing=expired').json()
    if data['total_items']:
        metric.gauge('fresh_items', data['total_items'])
Beispiel #6
0
def items_crawled_perday():
    data = requests.get(
        HOST + '/crawled_products?sincedays=1&missing=expired,feed').json()
    if data['total_items']:
        metric.gauge('items_crawled_perday', data['total_items'])
def push_datadog():
    setStats()
    metr.gauge(cfg.WORKER, stats['nThreadsWorker'])
#     metr.gauge(cfg.WORKER_2, thread_w2)
#     metr.gauge(cfg.WORKER_3, thread_w3)
    metr.gauge(cfg.TOTAL_SPIDERS, stats['totalSpds'])
    metr.gauge(cfg.PEDING_SPIDERS, stats['pendingSpds'])
    metr.gauge(cfg.RUNNING_SPIDERS, stats['runningSpds'])
    metr.gauge(cfg.NOT_RUNNING_SPIDERS, stats['nSpdsNotRun'])
    metr.gauge(cfg.MISSING_SPIDERS, stats['nSpdsMiss'])