def buildO2CMCompTable(comp_ids, quick): print("\nScraping competitions from o2cm") # Connect to dreamhost db mydb = database.getDB() # initialize cursor cursor = mydb.cursor() # check if comp table exists # create if not exist cursor.execute("CREATE TABLE IF NOT EXISTS competitions " + "(comp_id varchar(255) NOT NULL, " + "comp_name varchar(255), " + "is_nqe varchar(255), " + "date varchar(255), " + "PRIMARY KEY (comp_id))") # Initialize date variables comp_id = '' current_date = datetime.datetime.now() year = current_date.year month = current_date.month while year != 2004: print("searching", year, month) # Initialize web driver options = webdriver.ChromeOptions() options.add_argument('headless') driver = webdriver.Chrome(options=options) # GET request to competition results page driver.get('http://results.o2cm.com/') # Find form and filter by year and month month_element = driver.find_element_by_id("inmonth") month_element.clear() month_element.send_keys(month) year_element = driver.find_element_by_id("inyear") year_element.clear() year_element.send_keys(year) driver.find_element_by_name("Go").click() allCompResultsPage = BeautifulSoup(driver.page_source, 'html.parser') compHTMLTable = allCompResultsPage.select('table[id=main_tbl]')[0] # Countdown to help sort results from same date counter = len(compHTMLTable.select('tr')) # Iterate through table of Competitions for row in compHTMLTable.select('tr'): # An anchor element with href attribute indicates link to competition # results page if row.select('a[href]'): # Comp ID href = row.select('a[href]')[0]['href'] comp_id = re.search('(?<=event=).*?(?=&)', href).group(0) print(comp_id) if set([comp_id]).issubset(comp_ids): print("competition id " + comp_id + "' already present") if quick: return else: comp_ids.add(comp_id) # Date day_string = row.select('td')[0].get_text(strip=True) month_string = scraper_utils.numericalMonth(day_string[0:3]) day = day_string[4:] # Competition Name comp_name = row.select('a[href]')[0].get_text(strip=True) # NQE? If a competition's name includes the string 'NQE', we # assume it's an NQE. is_NQE = False if re.search("nqe", scraper_utils.cleanText(comp_name)): is_NQE = True date = str(year) + '-' + month_string + "-" + day + "-" + str(counter) # Add comp to db insert = "INSERT INTO competitions (comp_id, comp_name, is_nqe, date) VALUES('" + comp_id + "', '" + comp_name + "', '" + str(is_NQE) + "', '" + date + "')" print(insert) cursor.execute(insert) mydb.commit() # # Alternatively, write to file # competition = [ comp_id, comp_name, is_NQE, date ] # print("competition: " + str(competition)) # f = open("./output/comp-table.txt", "a") # f.write(str(competition)) # f.write("\n") # f.close # Decrement counter counter = counter - 1 # Update date variables month = month - 1 if month == 0: month = 12 year = year - 1 # while comp_id != stop_point # close db connection cursor.close()
import mysql.connector import lib.utils.database as database # connect to db, initialize cursor mydb = database.getDB() cursor = mydb.cursor() # Assert DB exists # Competitions # Assert competitions table exists # Assert certain records exists and have correct info # Events # Assert events table exists # Assert certain records exists and have correct info # Placements # Assert placements table exists # Assert certain records exists and have correct info
def buildBCECompTable(comp_ids, quick): print("\nPulling competitions from Ballroom Comp Express") # Connect to dreamhost db mydb = database.getDB() # initialize cursor cursor = mydb.cursor() # check if comp table exists # create if not exist cursor.execute("CREATE TABLE IF NOT EXISTS competitions" + " (comp_id varchar(255) NOT NULL, comp_name varchar(255), is_nqe varchar(255), date varchar(255), PRIMARY KEY (comp_id))") # Call Ballroom Comp Express API apikey = {"apikey": "8OGUr7i7bxohfo16"} r = requests.get("https://ballroomcompexpress.com/api/competitions", params=apikey) competitions = r.json() # GET https://ballroomcompexpress.com/api/competitions?apikey=8OGUr7i7bxohfo16 # returns JSON list of comp objects: # [ # { # "compid": "13", # "name": "U Dance Fest", # "startdate": "2020-03-07", # "enddate": "2020-03-08", # "email": "*****@*****.**", # "website": "http://udancefest.com", # "city": "St Paul", # "state": "MN", # "type": "1" # }, # ... # ] for competition in competitions: if set([competition["compid"]]).issubset(comp_ids): print(competition["compid"], competition["name"], "already present in DB") if quick: return else: comp_ids.add(competition["compid"]) # Add comp to db is_NQE = (competition["type"] == 2) insert = ("INSERT INTO competitions (comp_id, comp_name, is_nqe, date) " + "VALUES('" + competition["compid"] + "', " + "'" + competition["name"] + "', " + "'" + str(is_NQE) + "', " + "'" + competition["startdate"] + "')") print(insert) cursor.execute(insert) mydb.commit() # close db connection cursor.close()
def addToDB(): # connect to db mydb = database.getDB() # initialize cursor cursor = mydb.cursor() # check if comp table exists # create if not exist cursor.execute( "CREATE TABLE IF NOT EXISTS placements (" + " event_id varchar(255) NOT NULL," + " placement float," + " competitor_number int," + " lead_name varchar(255)," + " lead_id varchar(255)," + " follow_name varchar(255)," + " follow_id varchar(255)," + " location varchar(50)," + " raw_text varchar(255)," + " CONSTRAINT event_entry PRIMARY KEY (event_id,competitor_number)," + " FOREIGN KEY (event_id) REFERENCES events (event_id))") cursor.execute( "CREATE TABLE IF NOT EXISTS temp_placements (" + " event_id varchar(255) NOT NULL," + " placement float," + " competitor_number int," + " lead_name varchar(255)," + " lead_id varchar(255)," + " follow_name varchar(255)," + " follow_id varchar(255)," + " location varchar(50)," + " raw_text varchar(255)," + " CONSTRAINT event_entry PRIMARY KEY (event_id,competitor_number))") if not os.path.exists("output/placements.txt"): print("No placements to add to DB.") else: # Open output/placements.txt with open("output/placements.txt", "r") as placements_file: # iterate through file, do for every line for placement in placements_file: placement_summary = ast.literal_eval(placement) # Add placement summary to table insert = ( "INSERT IGNORE INTO temp_placements (event_id, " + "placement, " + "competitor_number, " + "lead_name, " + "lead_id, " + "follow_name, " + "follow_id, " + "location, " + "raw_text) " + "VALUES('" + placement_summary[0] + "', '" + str(placement_summary[1]) + "', '" + str(placement_summary[2]) + "', '" + placement_summary[3] + "', '" + placement_summary[4] + "', '" + placement_summary[5] + "', '" + placement_summary[6] + "', '" + placement_summary[7] + "', '" + re.escape(placement_summary[8]) + "')") cursor.execute(insert) mydb.commit() insert = ("INSERT IGNORE INTO placements (event_id, " + "placement, " + "competitor_number, " + "lead_name, " + "lead_id, " + "follow_name, " + "follow_id, " + "location, " + "raw_text) " + "SELECT * from temp_placements " + "WHERE event_id IN (SELECT event_id FROM events)") cursor.execute(insert) mydb.commit() # delete rows from temp placements that have been successfully inserted into placements delete = ( "DELETE FROM temp_placements WHERE event_id IN (SELECT event_id FROM placements) " + "AND placement IN (SELECT placement FROM placements)") cursor.execute(delete) mydb.commit() placements_file.close() # Close DB connection cursor.close()