Ejemplo n.º 1
0
def handle_url(url):
    for site, regex in sites_dict.items():
        if re.match(regex, url):
            handler = import_from('seleniumcrawler.sites.%s.%s' % (site, site), 'handle_link')
            result = handler(url)
            result['handler'] = site
            return result
    raise HandlerError('Handler for URL not defined')
Ejemplo n.º 2
0
def handle_url(url):
    for site, regex in sites_dict.items():
        if re.match(regex, url):
            handler = import_from("seleniumcrawler.sites.%s.%s" % (site, site), "handle_link")
            result = handler(url)
            result["handler"] = site
            return result
    raise HandlerError("Handler for URL not defined")
Ejemplo n.º 3
0
def locate_sites():
    location_list = []
    for site, regex in sites_dict.items():
        this_site_dir = os.path.join(SITES_DIR, site)
        # This is only the EXPECTED script name.
        # All scripts should follow this convention.
        script_name = site + '_raw.py'
        script_path = os.path.join(this_site_dir, script_name)
        config_path = os.path.join(this_site_dir, 'config.py')
        location_dict = {
            'name': site,
            'script_path': script_path,
            'config_path': config_path,
            'site_dir': this_site_dir
        }
        location_list.append(location_dict)

    return location_list
Ejemplo n.º 4
0
def locate_sites():
    location_list = []
    for site, regex in sites_dict.items():
        this_site_dir = os.path.join(SITES_DIR, site)
        # This is only the EXPECTED script name.
        # All scripts should follow this convention.
        script_name = site + '_raw.py'
        script_path = os.path.join(this_site_dir, script_name)
        config_path = os.path.join(this_site_dir, 'config.py')
        location_dict = {
            'name': site,
            'script_path': script_path,
            'config_path': config_path,
            'site_dir': this_site_dir
        }
        location_list.append(location_dict)

    return location_list