def wget_download(): results = get_all_hospitals() urls = [h['url'] for h in results] urls_unique = list(set(urls)) for url in urls_unique: q.enqueue(task_wget_download_hospital, {'url': url}, ttl=-1, timeout=86400) # timeout of 24 hours to grab whole site
def _output_hospital(self, hospital_data): """Takes an extracted hospital and adds it to the queue. Args: hospital_data -- Dictionary containing the hospital data. """ from queue_helper import q logging.info('Add hospital to task queue') q.enqueue(task_hospital_duplicate_detector, hospital_data)
def foursquare_seeder(): metadata = { "targetSquare": { 'NE': "40.797480, -73.858479", 'SW': "40.645527, -74.144426", # 'SW': "40.787480, -74.0", }, "step": 0.05 } q.enqueue(task_crawl_foursquare, metadata, ttl=-1)
def pubmed_all(): hospitals = get_hospital_as_list() for hospital in hospitals: q.enqueue(task_pubmed_crawler, hospital)
def clinical_trials(): hospitals = get_hospital_as_list() for hospital in hospitals: q.enqueue(task_find_clinical_trials, hospital)
def hospital_smart_names(): hospitals = get_hospital_as_list() for hospital in hospitals: q.enqueue(task_hospital_extract_names_smart, hospital, timeout=15*60)
def hospital_match_keywords(): hospitals = get_hospital_as_list() for hospital in hospitals: q.enqueue(task_clinicaltrials_graph_keywords, hospital, timeout=15*60)