Ejemplo n.º 1
0
def wget_download():
    results = get_all_hospitals()
    urls = [h['url'] for h in results]
    urls_unique = list(set(urls))

    for url in urls_unique:
        q.enqueue(task_wget_download_hospital, {'url': url}, ttl=-1,
                  timeout=86400)  # timeout of 24 hours to grab whole site
Ejemplo n.º 2
0
    def _output_hospital(self, hospital_data):
        """Takes an extracted hospital and adds it to the queue.

        Args:
            hospital_data -- Dictionary containing the hospital data.
        """
        from queue_helper import q
        logging.info('Add hospital to task queue')
        q.enqueue(task_hospital_duplicate_detector, hospital_data)
Ejemplo n.º 3
0
def foursquare_seeder():
    metadata = {
        "targetSquare": {
            'NE': "40.797480, -73.858479",
            'SW': "40.645527, -74.144426",
            # 'SW': "40.787480, -74.0",
        },
        "step": 0.05
    }

    q.enqueue(task_crawl_foursquare, metadata, ttl=-1)
Ejemplo n.º 4
0
def pubmed_all():
    hospitals = get_hospital_as_list()

    for hospital in hospitals:
        q.enqueue(task_pubmed_crawler, hospital)
Ejemplo n.º 5
0
def clinical_trials():
    hospitals = get_hospital_as_list()

    for hospital in hospitals:
        q.enqueue(task_find_clinical_trials, hospital)
Ejemplo n.º 6
0
def hospital_smart_names():
    hospitals = get_hospital_as_list()

    for hospital in hospitals:
        q.enqueue(task_hospital_extract_names_smart, hospital, timeout=15*60)
Ejemplo n.º 7
0
def hospital_match_keywords():
    hospitals = get_hospital_as_list()

    for hospital in hospitals:
        q.enqueue(task_clinicaltrials_graph_keywords, hospital, timeout=15*60)