def respond(bot, update): postal_code = update.message.text if ("win" in postal_code) or ("election" in postal_code): msg = "Sorry there's no public information on this. Democracy is uncertain. :/" elif (isValid(postal_code)): # all postal codes are 6-d numbers, but not necessarily exist url = "https://sggrc.herokuapp.com/postcode/" + postal_code js = get_json_from_url(url) grc = js["grc"] if grc: members = scraping(grc) if (len(members) > 1): names = ', '.join(members) msg = "Your GRC is " + grc + " and your MPs are: " + names else: msg = "Your SMC is " + grc + " and your MP is: " + members[0] else: msg = "Postal code is not yet in database/ does not exist!" else: msg = "Invalid postal code!" bot.send_message(chat_id=update.message.chat_id, text=msg)
def update_db(self): for data, currency, city in scraping(): city = create_city_dict(city) if not self.session.query(City).filter( City.name == city['name']).one_or_none(): self.add_city(city) if not self.session.query(Currency).filter( Currency.currency_name == currency).one_or_none(): self.add_currency(currency) apartment_in_db = self.session.query(Apartment).filter( Apartment.id == data["id"]).one_or_none() if apartment_in_db: if 'update_date' in data.keys( ) and apartment_in_db.update_date != data['update_date']: self.session.delete(apartment_in_db) self.add_apartment(data, currency, city) else: self.add_apartment(data, currency, city) self.session.commit() self.session.close()
def test_scraping(url=url1, scraper=scraper): results = scraping(url, scraper) assert results == ['¿Cómo funcionan los nuevos ' 'test covid de las farmacias?']
from scraper import prepare_scraper, scraping import logging logging.basicConfig(level=logging.DEBUG, filename='app.log', filemode='w', format='%(asctime)s - %(levelname)s - %(message)s') logging.debug('Starting the script') url_train = 'https://www.elperiodicoextremadura.com/noticias/extremadura/' \ 'claves-saber-si-terraza-invierno-debe-estar-' \ '40-50-aforo_1260164.html' url1 = 'https://www.elperiodicoextremadura.com/noticias/extremadura/' \ 'como-funcionan-nuevos-test-covid-farmaciaseuros_1260171.html' wanted_list = [ "Claves para saber si una terraza de invierno debe estar al" " 40% o al 50% del aforo" ] if __name__ == '__main__': logging.debug('Starting "prepare_scraper"') scraper = prepare_scraper(url_train, wanted_list) logging.debug('Starting "scraping"') result = scraping(url1, scraper) print(result) logging.debug('Script finished well')
#Creates Excel File of Scraped Data def save_progress(scraped_data_frame): input_data_frame = pd.read_excel(file_name) resulting_data_frame = pd.concat([input_data_frame, scraped_data_frame], axis=1) createExcel(resulting_data_frame) print("Progress Saved") id_iterator = id_counter_generator(file_name) results = pd.DataFrame(columns=[ "Status", "PM_Response", "Date", "Solution", "Details", "Comment1", "Comment2", "Comment3", "Comment4", "Comment5", "Comment6", "Comment7", "Comment8", "Comment9", "Comment10" ]) counter = 0 #Scrapes each idea and Saves to Data Frame for idea_id in id_iterator: print("------------------------------") scraped_idea = scraping(idea_id) results = addToDataFrame(results, *scraped_idea) counter += 1 print(counter) if counter % 100 == 0: #Saves progress every 100 entries save_progress(results) #Saves final progress save_progress(results)