def scoreboardByDate(date): if not date in browsers: print('loading again for ' + date) browsers[date] = get_browser() browsers[date].get(ESPN_BASE + SCOREBOARD_ENDPOINT + DATE_EXTENSION.format(date)) soup = get_soup(browsers[date].page_source) return jsonify(get_all_games(soup))
def record_hour(records, trys): last_date = database.get_last_date().strftime('%Y-%m-%d %H:%M:%S') soup = get_soup(location_info("central-western")[1]) date = soup.select("#cltNormal table tbody tr td.H24C_ColDateTime")[0].text comp_date = str( DateDecoder(date, timezone='Asia/Hong_Kong').sqlTimestamp()) print last_date print comp_date trys += 1 # keep trying every five minutes for a 55 min if last_date == comp_date: print "TRIED %s TIMES" % (trys) if trys <= 11: print "THE SAME" # wait five minutes and try again sleep(300) record_hour(records, trys) else: print "NO UPDATE, ALL DONE" else: print "NOT SAME" database.insert_records(gather_data(list_of_locations))
import scrape import analyze as az from plot import plot import db as db from debug import print_cases from log import logger_setup from all_covid_data import all_covid_data logger = logger_setup("main.py") logger.info("*************** begin script ***************") # create soup object from html url = 'https://eblanding.com/covid-19-case-report-summary/' soup = scrape.get_soup(url, print_flag=False) logger.info('create soup object from html') # parse all <pre> html tags covid_data_pre = scrape.parse_html('pre', soup) logger.info("parse all <pre> html tags") # parse all <p> html tags covid_data_p = scrape.parse_html('p', soup) # October 19th & 23rd logger.info("parse all <p> html tags") # merge <pre> and <p> lists covid_data = az.merge_day_list(covid_data_p, covid_data_pre) logger.info("merge <pre> and <p> lists") # print_cases(covid_data,3)
def boxscore(game_id): with open('index.html', 'r') as file: soup = get_soup(file.read()) return jsonify(get_boxscore(soup))
def scoreboard(): browsers['today'].refresh() soup = get_soup(browsers['today'].page_source) return jsonify(get_all_games(soup))
def main(): ''' 'math', 'phys', 'chem', 'earth', 'geo', 'eco', 'mechEng', 'electric', 'automation', 'telecom', 'bioMed', 'compSci', 'civil', 'chemEng', 'materialSci', 'nano', 'energy', 'enviro', 'water', 'biotech', 'aerospace', 'marineEng', 'transport', 'remoteSensing', 'bio', 'humanBio', 'clinicalMed', 'pubHlth', 'medTech', 'pharma', 'econ', 'stats', 'poliSci', ''' subjects = ('sociology', 'edu', 'psych', 'finance', 'mngmnt') db = pd.HDFStore('database.h5') print(db) school_data = db['store'] def get_user_action(msg): print(school_data.to_string()) print(msg) saved = False if input('Save data? ') != 'n': db['store'] = school_data saved = True if input('Quit? ') == 'y': if not saved: if input('Sure you don\'t want to save? ') != 'y': db['store'] = school_data if input('Really quit? ') == 'y': db.close() exit() for sub in subjects: try: print('\nLoading Shanghai %s data' % sub) shanghai = sc.get_soup(r'file:\\' + abspath('Shanghai Rankings\%s.html' % sub)) table = shanghai.find('table', id='UniversityRanking')('tr', limit=50)[1:] except: get_user_action('Failed.') for row in table: data = row('td') country = data[2].img['title'] if country in ('United States', 'Canada', 'United Kingdom'): if data[1].a: name = data[1].a.string else: name = data[1].string if country != 'United States': school_data.loc[name] = [np.nan] * len(school_data.columns) print('%s (%s) added.' % (name, country)) elif name not in school_data.index: soup = sc.find_school_princeton_page(name) if soup: st.store(soup, name, school_data) else: get_user_action('Match for %s not found.' % name) try: school_data.loc[name][sub] = float(data[3].string) except ValueError: get_user_action('Failed to store subject score for %s.' % name) get_user_action('End of category reached.')