def get_job_state_handler(url, spider_host = "localhost", spider_port = "6800"): mmu = MemexMongoUtils() scrapyd_util = ScrapydJob(spider_host, spider_port) job_id = mmu.get_seed_doc(url)["job_id"] return scrapyd_util.get_state(job_id)
def schedule_spider_handler(seed, spider_host = "localhost", spider_port = "6800"): mmu = MemexMongoUtils() scrapyd_util = ScrapydJob(spider_host, spider_port, screenshot_dir = SCREENSHOT_DIR) job_id = scrapyd_util.schedule(seed) mmu.add_job(seed, job_id) return True
def schedule_spider_handler(seed, spider_host = "localhost", spider_port = "6800"): mmu = MemexMongoUtils() scrapyd_util = ScrapydJob(spider_host, spider_port, project = "discovery-project", spider='topical_finder', screenshot_dir = SCREENSHOT_DIR) job_id = scrapyd_util.schedule(seed) mmu.add_job(seed, job_id, project = "discovery-project", spider = "topical_finder") return True
def get_job_state_handler(url, spider_host="localhost", spider_port="6800"): mmu = MemexMongoUtils() seed_doc = mmu.get_seed_doc(url) job_id = seed_doc["job_id"] project = seed_doc["project"] scrapyd_util = ScrapydJob(spider_host, spider_port, project=project) return scrapyd_util.get_state(job_id)
def get_job_state_handler(url, spider_host = "localhost", spider_port = "6800"): mmu = MemexMongoUtils() seed_doc = mmu.get_seed_doc(url) job_id = seed_doc["job_id"] project = seed_doc["project"] scrapyd_util = ScrapydJob(spider_host, spider_port, project = project) return scrapyd_util.get_state(job_id)
def schedule_spider_searchengine_handler(search_terms, spider_host="localhost", spider_port="6800"): mmu = MemexMongoUtils() project = "searchengine-project" spider = "google.com" scrapyd_util = ScrapydJob( scrapyd_host=spider_host, scrapyd_port=spider_port, project=project, spider=spider, screenshot_dir=SCREENSHOT_DIR, ) job_id = scrapyd_util.schedule_keywords(search_terms) mmu.add_job(search_terms, job_id, project=project, spider=spider)
def schedule_spider_handler(seed, spider_host="localhost", spider_port="6800"): mmu = MemexMongoUtils() scrapyd_util = ScrapydJob(spider_host, spider_port, project="discovery-project", spider='topical_finder', screenshot_dir=SCREENSHOT_DIR) job_id = scrapyd_util.schedule(seed) mmu.add_job(seed, job_id, project="discovery-project", spider="topical_finder") return True