Python scraping Beispiele

Programmiersprache: Python

Namespace / Paketname: scraper

Methode / Funktion: scraping

Beispiele auf hotexamples.com: 5

Python scraping - 5 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die scraper.scraping, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

def respond(bot, update):
    postal_code = update.message.text
    if ("win" in postal_code) or ("election" in postal_code):
        msg = "Sorry there's no public information on this. Democracy is uncertain. :/"
    elif (isValid(postal_code)):
        # all postal codes are 6-d numbers, but not necessarily exist
        url = "https://sggrc.herokuapp.com/postcode/" + postal_code
        js = get_json_from_url(url)
        grc = js["grc"]
        if grc:
            members = scraping(grc)
            if (len(members) > 1):
                names = ', '.join(members)
                msg = "Your GRC is " + grc + " and your MPs are: " + names
            else:
                msg = "Your SMC is " + grc + " and your MP is: " + members[0]
        else:
            msg = "Postal code is not yet in database/ does not exist!"
    else:
        msg = "Invalid postal code!"
    bot.send_message(chat_id=update.message.chat_id, text=msg)

Beispiel #2

Datei anzeigen

Datei: real_estate_db.py Projekt: RubenBejanyan/real_estate_app

    def update_db(self):
        for data, currency, city in scraping():
            city = create_city_dict(city)
            if not self.session.query(City).filter(
                    City.name == city['name']).one_or_none():
                self.add_city(city)
            if not self.session.query(Currency).filter(
                    Currency.currency_name == currency).one_or_none():
                self.add_currency(currency)
            apartment_in_db = self.session.query(Apartment).filter(
                Apartment.id == data["id"]).one_or_none()

            if apartment_in_db:
                if 'update_date' in data.keys(
                ) and apartment_in_db.update_date != data['update_date']:
                    self.session.delete(apartment_in_db)
                    self.add_apartment(data, currency, city)
            else:
                self.add_apartment(data, currency, city)
            self.session.commit()
        self.session.close()

Beispiel #3

Datei anzeigen

def test_scraping(url=url1, scraper=scraper):
    results = scraping(url, scraper)
    assert results == ['¿Cómo funcionan los nuevos '
                       'test covid de las farmacias?']

Beispiel #4

Datei anzeigen

Datei: main.py Projekt: camorales197/News_Scraping

from scraper import prepare_scraper, scraping
import logging

logging.basicConfig(level=logging.DEBUG,
                    filename='app.log',
                    filemode='w',
                    format='%(asctime)s - %(levelname)s - %(message)s')
logging.debug('Starting the script')

url_train = 'https://www.elperiodicoextremadura.com/noticias/extremadura/' \
            'claves-saber-si-terraza-invierno-debe-estar-' \
      '40-50-aforo_1260164.html'

url1 = 'https://www.elperiodicoextremadura.com/noticias/extremadura/' \
       'como-funcionan-nuevos-test-covid-farmaciaseuros_1260171.html'

wanted_list = [
    "Claves para saber si una terraza de invierno debe estar al"
    " 40% o al 50% del aforo"
]

if __name__ == '__main__':
    logging.debug('Starting "prepare_scraper"')
    scraper = prepare_scraper(url_train, wanted_list)
    logging.debug('Starting "scraping"')
    result = scraping(url1, scraper)
    print(result)
    logging.debug('Script finished well')

Beispiel #5

Datei anzeigen

Datei: main.py Projekt: SalesforceSFDC/Salesforce-Web-Scraper

#Creates Excel File of Scraped Data
def save_progress(scraped_data_frame):
    input_data_frame = pd.read_excel(file_name)
    resulting_data_frame = pd.concat([input_data_frame, scraped_data_frame],
                                     axis=1)
    createExcel(resulting_data_frame)
    print("Progress Saved")


id_iterator = id_counter_generator(file_name)
results = pd.DataFrame(columns=[
    "Status", "PM_Response", "Date", "Solution", "Details", "Comment1",
    "Comment2", "Comment3", "Comment4", "Comment5", "Comment6", "Comment7",
    "Comment8", "Comment9", "Comment10"
])
counter = 0

#Scrapes each idea and Saves to Data Frame
for idea_id in id_iterator:
    print("------------------------------")
    scraped_idea = scraping(idea_id)
    results = addToDataFrame(results, *scraped_idea)
    counter += 1
    print(counter)
    if counter % 100 == 0:  #Saves progress every 100 entries
        save_progress(results)

#Saves final progress
save_progress(results)