def main(): # loops through each vehicle Make make_soup = tools.openWebPage(base_url + "Cars") make_list = make_soup.find(id='makelist') for make in make_list.find_all('a'): print make.string # loops through each Make-Year tools.randomSleep() year_soup = tools.openWebPage(baseurl + make.get('href')) year_list = year_soup.find(id='rtn_content_modelyear_yearlist') for year in year_list.find_all('a'): # Ignores Make-Year's that specify Used (the same info is captured in the "new" page) if 'Used' not in year.get('href'): print year.string # loops through each Make-Year-Model tools.randomSleep() model_soup = tools.openWebPage(baseurl + year.get('href')) # checks to see if there are specific Model pages # older years do not have Model pages and just display Model-Trim info right away if model_soup.find(id='rtn_content_models'): model_list = model_soup.find(id='rtn_content_models') for model in model_list.find_all("tr"): # captures the Make-Year-Model info as well as Model description (model_info_string, trim_href, model_info_desc, img_string) = getModelInfo(model) print "Got " + model_info_string getTrimInfo(baseurl + trim_href, year.string, make.string, model_info_string, img_string) else: getTrimInfo(baseurl + trim_href, year.string, make.string)
def getTrimInfo(trim_url, year, make, model_string='', img_string=''): tools.randomSleep() trim_soup = tools.openWebPage(trim_url) trim_list = trim_soup.find(id='rtn_content_trims') for trim in trim_list.find_all("tr"): if trim.find("rtn_content_trims_modelname"): full_model_string = trim.find("rtn_content_trims_modelname").string.strip() model = full_model_string.split("-")[0] sub_model = " ".join(full_model_string.split("-")[1:]) for types in trim.find_all("href"): trim_string = sub_model + types.string # This is what goes in the database print year, make, model, trim_string, img_string else: remove_string = year + " " + make + " " + model_string full_model_string = trim.find("href").string.strip() trim_string = full_model_string.replace(remove_string, '') # This is what goes in the database print year, make, model, trim_string, img_string