def directExtract(args): url = construct_base_url(args.make, args.model, args.year, args.style) optional_parameters = ['intent', 'pricetype', 'mileage'] # we use the built in command 'vars' to convert the argparse.Namespace # to a standard python dictionary... what even is an argparse.Namespace? parameters = construct_parameters(vars(args), optional_parameters) url_fetched = url_fetch(url, parameters) tempprice = modelkbb_v1_debug.extractPricekbb(url_fetched.text)
def main(): description = """Scrape data from KBB""" parser = argparse.ArgumentParser(description=description) # Positional (pass them in proper order, and yes they are required) parser.add_argument('make', help='Make') parser.add_argument('model', help='Model') parser.add_argument('year', help='Year') parser.add_argument('style', help='Style') # Optional optional_parameters = ['intent', 'pricetype', 'mileage'] parser.add_argument('-i', '--intent', dest='intent', help='Intent') parser.add_argument('-p', '--pricetype', dest='pricetype', help='Price Type') parser.add_argument('-m', '--mileage', dest='mileage', help='Mileage') args = parser.parse_args() # Construct the base url url = construct_base_url(args.make, args.model, args.year, args.style) # we use the built in command 'vars' to convert the argparse.Namespace # to a standard python dictionary... what even is an argparse.Namespace? parameters = construct_parameters(vars(args), optional_parameters) url_fetched = url_fetch(url, parameters) if url_fetched: """ Not super elegant, nice to make more than one comparison per conditional statement. Need to think through: - output formats - using intent also, because pricetype ins't enough: - - check out: http://www.kbb.com/kia/optima/2013-kia-optima/ex/ http://www.kbb.com/kia/optima/2013-kia-optima/ex-sedan-4d/ Neither of these things use an intent or pricetype and they are the same platform with the same trim level.... """ if not args.pricetype: print parse_single(url_fetched.text) if args.pricetype == 'cpo': print parse_single(url_fetched.text) if args.pricetype == 'retail': print parse_single(url_fetched.text) if args.pricetype == 'trade-in': print parse_list(url_fetched.text) if args.pricetype == 'private-party': print parse_list(url_fetched.text)
def main(): description = """Parse a site and download selected media if the proper domain specific scraping mechanism is written""" parser = argparse.ArgumentParser(description=description) parser.add_argument('site', help='The site to be scraped, supplied as url') parser.add_argument('-o', '--output', help='output to a file') args = parser.parse_args() site = args.site module_name = parse_module_name(site) url_fetched = url_fetch(site) if url_fetched: try: full_module_name = 'scrape.' + module_name # - Import Module or fail parser = __import__(full_module_name, globals(), [], -1) except ImportError: print 'Not valid parser for: ' + site return # - Get the Title title = parser.get_title(url_fetched.text) # - Make the output folder if not args.output: folder = create_folder(title) else: folder = create_folder(title, args.output) # - Get the Text text = parser.get_text(url_fetched.text) # - Write the Background.txt (title + text + date scraped) # (I likely need to do something better to check folder creation) write_background(site, text, folder) # - Get the Images images = parser.get_images(url_fetched.text) # - Download the Images write_image_list(images, folder) else: print 'Not a valid site: ' + site return