예제 #1
0
    def __init__(self):

        with open("bbb_config.yaml", "r") as f:
            settings = yaml.load(f)

        self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
        self.driver.set_window_size(1024, 768)
        self.outfile = settings['output']
        self.fieldnames = ('name', 'url', 'address', 'city', 'state', 'zip', 'phone', 'type', 'email', 'contact')
        self.url_cats = ''
        self.site_url = settings['site_url']
        self.page_url = settings['page_url']
        self.change_urls = settings['change_url']
        self.base_url = strip_final_slash(get_base_url(self.site_url))
        self.pc = 0
예제 #2
0
파일: scraper.py 프로젝트: smehan/py-webetl
    def __init__(self):
        init_logging()
        self.logger = logging.getLogger(__name__)
        self.logger.info("Job started and logging enabled")

        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"config.yml"), "r") as fh:
            settings = yaml.load(fh)

        self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
        self.driver.set_window_size(1024, 768)
        self.shipping_rate = 0.75  # $rate/lb  # TODO: shift this to AZ class
        self.outfile = "../data/test.csv"
        self.fieldnames = ('net', 'roi', 'name', 'price', 'az_price', 'weight',
                           'az_sales_rank', 'az_match', 'url', 'img', 'az_url', 'az_asin')
        self.url_cats = settings['toys']
        self.site_url = settings['site_url']
        self.page_url = settings['page_url']
        self.base_url = strip_final_slash(get_base_url(self.site_url))
        self.az = AZ()
        self.depth_limit = settings['depth_limit']
예제 #3
0
    def __init__(self):
        init_logging(default_path='../loggerUtils/logging.yml')
        self.logger = logging.getLogger(__name__)
        self.logger.info("Wiki Geo object initialized and logging enabled")

        with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"wiki_config.yml"), "r") as fh:
            settings = yaml.load(fh)

        self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])
        self.driver.set_window_size(1024, 768)
        self.outfile = settings['output']
        self.depth_limit = settings['depth_limit']
        self.debug = settings['debug']
        self.reuse = settings['reuse']
        self.fieldnames = ('FIPS', 'GNIS', 'area-codes', 'county', 'county-url', 'density-2010-sqkm',
                           'density-2010-sqmi', 'elevation-ft', 'elevation-m', 'geohack-url',
                           'land-area', 'lat', 'location-img', 'census-map',
                           'long', 'place-name', 'place-type', 'place-url',
                           'place-www', 'pop-2010', 'pop-estimate', 'state',
                           'state-url', 'total-area', 'water-area', 'zips')
        self.top_url = settings['top_url']
        self.base_url = strip_final_slash(get_base_url(self.top_url))