def scrape(manual_run, input_street_name=None, input_arrondissement=None, input_street_type=None, input_address=None, input_suite=None, input_direction=None): '''Function to perform the interaction with the website like a human user''' AP.reset_dict() if not input_street_name: print "Input street name (partial names work best):" input_street = raw_input("> ") else: input_street = input_street_name print "Input street name (partial names work best):" print "> " + input_street input_street = input_street.upper() # Specifically try for case with "de la" or "des" in street name which # has tendency to screw things up input_street_list = [input_street] if input_street[:6] == "DE LA ": input_street_list.append(input_street[6:]) elif input_street[:5] == "DE L ": input_street_list.append(input_street[5:]) elif input_street[:4] == "DES ": input_street_list.append(input_street[4:]) elif input_street[:3] == "DE ": input_street_list.append(input_street[3:]) elif input_street[:3] == "DU ": input_street_list.append(input_street[3:]) selected_nbhood = None # try best match with full string first and break on success, # otherwise attempt next best with "de la" or "des" removed for street_name in input_street_list: if not selected_nbhood: print 'Street search: "' + street_name + '"...' # Emulate web browse instance with Mechanize BrowserInstance = Browser() Scraper = BrowserInstance.br #### Log-in Sequence #### # Log into Role d'Evaluation Fonciere main page (to set browser # ASP.NET cookie) init_url = 'http://evalweb.ville.montreal.qc.ca/default.asp' Scraper.open(init_url, timeout=TIMEOUT) # Get list of neighborhoods for entered street name Scraper.select_form(name='Formulaire') Scraper.form['text1'] = street_name street_name = urllib.quote(street_name) BrowserInstance.mimic_cookie_js(street_name) response = Scraper.submit().read() # Look for select_field containing the list of neighboorhoods # from the returned webpage soup = BeautifulSoup(response) neighborhoods_html = soup.find('select', {'id': 'select1'}) try: nbhood_choices = neighborhoods_html.findAll('option') nbhood_choices.pop(0) # Remove junk header entry # Iterate through the displayed options matching them to the # site URL_ID nbhd_selection_list = [] for choice in nbhood_choices: # url id embedded in attributes in a tuple pair nbhood_url_id = choice.attrs[0][1] # extract neighborhood name address_list = AP.parse_nbhoods(choice.text) address_list.append(nbhood_url_id) nbhd_selection_list.append(address_list) if manual_run: # display input selection prompt if run manually selected_nbhood = selection_prompt(nbhd_selection_list) selected_nbhood = selected_nbhood - 1 else: print "Looking for arrondissement: " + input_arrondissement selected_nbhood = nbhd_search( nbhd_selection_list, input_arrondissement, input_street, input_street_type, input_direction) except Exception, e: print "Warning: No match found on assessment roll." time.sleep(1)