Example #1
0
def directExtract(args):
    url = construct_base_url(args.make, args.model, args.year, args.style)
    optional_parameters = ['intent', 'pricetype', 'mileage']
    # we use the built in command 'vars' to convert the argparse.Namespace
    # to a standard python dictionary... what even is an argparse.Namespace?
    parameters = construct_parameters(vars(args), optional_parameters)
    url_fetched = url_fetch(url, parameters)
    tempprice = modelkbb_v1_debug.extractPricekbb(url_fetched.text)
Example #2
0
def main():

    description = """Scrape data from KBB"""

    parser = argparse.ArgumentParser(description=description)
    # Positional (pass them in proper order, and yes they are required)
    parser.add_argument('make', help='Make')
    parser.add_argument('model', help='Model')
    parser.add_argument('year', help='Year')
    parser.add_argument('style', help='Style')
    # Optional
    optional_parameters = ['intent', 'pricetype', 'mileage']
    parser.add_argument('-i', '--intent', dest='intent', help='Intent')
    parser.add_argument('-p', '--pricetype', dest='pricetype', help='Price Type')
    parser.add_argument('-m', '--mileage', dest='mileage', help='Mileage')

    args = parser.parse_args()

    # Construct the base url
    url = construct_base_url(args.make, args.model, args.year, args.style)

    # we use the built in command 'vars' to convert the argparse.Namespace
    # to a standard python dictionary... what even is an argparse.Namespace?
    parameters = construct_parameters(vars(args), optional_parameters)

    url_fetched = url_fetch(url, parameters)

    if url_fetched:
        """ Not super elegant, nice to make more than one comparison per
        conditional statement.

        Need to think through:
         - output formats
         - using intent also, because pricetype ins't enough:
          - - check out:
          http://www.kbb.com/kia/optima/2013-kia-optima/ex/
          http://www.kbb.com/kia/optima/2013-kia-optima/ex-sedan-4d/

          Neither of these things use an intent or pricetype and they are the same platform with the same trim level....
        """
        if not args.pricetype:
            print parse_single(url_fetched.text)
        if args.pricetype == 'cpo':
            print parse_single(url_fetched.text)
        if args.pricetype == 'retail':
            print parse_single(url_fetched.text)
        if args.pricetype == 'trade-in':
            print parse_list(url_fetched.text)
        if args.pricetype == 'private-party':
            print parse_list(url_fetched.text)
Example #3
0
def main():

    description = """Parse a site and download selected media if the proper
    domain specific scraping mechanism is written"""

    parser = argparse.ArgumentParser(description=description)
    parser.add_argument('site', help='The site to be scraped, supplied as url')
    parser.add_argument('-o', '--output', help='output to a file')

    args = parser.parse_args()
    site = args.site

    module_name = parse_module_name(site)
    url_fetched = url_fetch(site)

    if url_fetched:
        try:
            full_module_name = 'scrape.' + module_name
            # - Import Module or fail
            parser = __import__(full_module_name, globals(), [], -1)
        except ImportError:
            print 'Not valid parser for: ' + site
            return

        # - Get the Title
        title = parser.get_title(url_fetched.text)

        # - Make the output folder
        if not args.output:
            folder = create_folder(title)
        else:
            folder = create_folder(title, args.output)

        # - Get the Text
        text = parser.get_text(url_fetched.text)

        # - Write the Background.txt (title + text + date scraped)
        #   (I likely need to do something better to check folder creation)
        write_background(site, text, folder)

        # - Get the Images
        images = parser.get_images(url_fetched.text)

        # - Download the Images
        write_image_list(images, folder)

    else:
        print 'Not a valid site: ' + site
        return