def __init__(self): with open("bbb_config.yaml", "r") as f: settings = yaml.load(f) self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) self.driver.set_window_size(1024, 768) self.outfile = settings['output'] self.fieldnames = ('name', 'url', 'address', 'city', 'state', 'zip', 'phone', 'type', 'email', 'contact') self.url_cats = '' self.site_url = settings['site_url'] self.page_url = settings['page_url'] self.change_urls = settings['change_url'] self.base_url = strip_final_slash(get_base_url(self.site_url)) self.pc = 0
def __init__(self): init_logging() self.logger = logging.getLogger(__name__) self.logger.info("Job started and logging enabled") with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"config.yml"), "r") as fh: settings = yaml.load(fh) self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) self.driver.set_window_size(1024, 768) self.shipping_rate = 0.75 # $rate/lb # TODO: shift this to AZ class self.outfile = "../data/test.csv" self.fieldnames = ('net', 'roi', 'name', 'price', 'az_price', 'weight', 'az_sales_rank', 'az_match', 'url', 'img', 'az_url', 'az_asin') self.url_cats = settings['toys'] self.site_url = settings['site_url'] self.page_url = settings['page_url'] self.base_url = strip_final_slash(get_base_url(self.site_url)) self.az = AZ() self.depth_limit = settings['depth_limit']
def __init__(self): init_logging(default_path='../loggerUtils/logging.yml') self.logger = logging.getLogger(__name__) self.logger.info("Wiki Geo object initialized and logging enabled") with open(os.path.join(os.path.dirname(os.path.abspath(__file__)),"wiki_config.yml"), "r") as fh: settings = yaml.load(fh) self.driver = webdriver.PhantomJS(desired_capabilities=dcap, service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any']) self.driver.set_window_size(1024, 768) self.outfile = settings['output'] self.depth_limit = settings['depth_limit'] self.debug = settings['debug'] self.reuse = settings['reuse'] self.fieldnames = ('FIPS', 'GNIS', 'area-codes', 'county', 'county-url', 'density-2010-sqkm', 'density-2010-sqmi', 'elevation-ft', 'elevation-m', 'geohack-url', 'land-area', 'lat', 'location-img', 'census-map', 'long', 'place-name', 'place-type', 'place-url', 'place-www', 'pop-2010', 'pop-estimate', 'state', 'state-url', 'total-area', 'water-area', 'zips') self.top_url = settings['top_url'] self.base_url = strip_final_slash(get_base_url(self.top_url))