from pyquery import PyQuery as pq import scraper import constants sports = { "Football" : constants.FOOTBALL, "Men's Basketball" : constants.MENS_BASKETBALL, "Men's Golf" : constants.MENS_GOLF, "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY, "Nordic Skiiing" : [constants.MENS_NORDIC_SKIING, constants.WOMENS_NORDIC_SKIING], "Women's Basketball" : constants.WOMENS_BASKETBALL, "Women's Soccer" : constants.WOMENS_SOCCER, "Women's Swimming & Diving" : constants.WOMENS_SWIMMING_DIVING, "Women's Track & Field" : constants.WOMENS_TRACK_FIELD, "Women's Volleyball" : constants.WOMENS_VOLLEYBALL, "Women's Cross Country" : constants.WOMENS_CROSS_COUNTRY } print ("Scraping Northern Michigan") scraper.scrape_roster_row_site("Northern Michigan", sports, "h2", ["name", "title", "phone", "email"])
from pyquery import PyQuery as pq import scraper import constants scraper.sports["Women's Sailing"] = constants.SAILING scraper.sports["Coed Sailing"] = constants.SAILING scraper.sports["Heavyweight Crew"] = constants.MENS_ROWING scraper.sports["Lightweight Crew"] = constants.MENS_ROWING def get_table(header): return header.parent().next().next() def get_finder(header_tag, key): return header_tag + ':contains("Yale ' + key + '")' print ("Scraping Yale") scraper.scrape_roster_row_site("Yale University", scraper.sports, "b", fields=['name', 'title', 'phone', 'email'], get_table=get_table, get_finder=get_finder)
from pyquery import PyQuery as pq import scraper import constants sports = { "Cheerleading" : constants.CHEERLEADING, "Baseball" : constants.BASEBALL, "Men's Basketball" : constants.MENS_BASKETBALL, "Women's Basketball" : constants.WOMENS_BASKETBALL, "Field Hockey" : constants.FIELD_HOCKEY, "Football" : constants.FOOTBALL, "Men's and Women's Golf" : [constants.MENS_GOLF, constants.WOMENS_GOLF], "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY, "Women's Ice Hockey" : constants.WOMENS_ICE_HOCKEY, "Men's Lacrosse" : constants.MENS_LACROSSE, "Women's Lacrosse" : constants.WOMENS_LACROSSE, "Men's & Women's Rowing" : [constants.MENS_ROWING, constants.WOMENS_ROWING], "Men's Soccer" : constants.MENS_SOCCER, "Women's Soccer" : constants.WOMENS_SOCCER, "Softball" : constants.SOFTBALL, "Men's & Women's Swimming & Diving" : [constants.MENS_SWIMMING_DIVING, constants.WOMENS_SWIMMING_DIVING], "Men's & Women's Tennis" : [constants.MENS_TENNIS, constants.WOMENS_TENNIS], "Men's & Women's Track & Field / Cross Country" : [constants.MENS_CROSS_COUNTRY, constants.WOMENS_CROSS_COUNTRY, constants.MENS_TRACK_FIELD, constants.WOMENS_TRACK_FIELD], "Volleyball" : constants.WOMENS_VOLLEYBALL } def get_table(header): return header.parent().next() print ("Scraping Holy Cross") scraper.scrape_roster_row_site("Holy Cross", sports, "h3", ["name", "title", "phone", "email"], get_table=get_table)
from pyquery import PyQuery as pq import scraper import constants sports = { "Cross Country" : [constants.MENS_CROSS_COUNTRY, constants.WOMENS_CROSS_COUNTRY], "Football" : constants.FOOTBALL, "Men's Basketball" : constants.MENS_BASKETBALL, "Men's Ice Hockey" : constants.MENS_ICE_HOCKEY, "Men's Tennis" : constants.MENS_TENNIS, "Nordic Skiing" : [constants.MENS_NORDIC_SKIING, constants.WOMENS_NORDIC_SKIING], "Track & Field" : [constants.MENS_TRACK_FIELD, constants.WOMENS_TRACK_FIELD], "Women's Basketball" : constants.WOMENS_BASKETBALL, "Women's Soccer" : constants.WOMENS_SOCCER, "Women's Tennis" : constants.WOMENS_TENNIS, "Women's Volleyball" : constants.WOMENS_VOLLEYBALL } print ("Scraping Michigan Tech") scraper.scrape_roster_row_site("Michigan Tech", sports, "h2", ["name", "title", "phone", "email"])