class WikipediaService(object): def __init__(self, country=None, url=None): if country is not None: self.country = country if url is not None: self.scraper = Scraper(url=url) def get_global_stats(self): print(f"Fetching global stats.") self.scraper = Scraper( url= "https://en.m.wikipedia.org/wiki/2019%E2%80%9320_coronavirus_pandemic_by_country_and_territory" ) res = self.process_table("Countries and territories", "2", "-2", "2", "3", "4", "5") return res def search_table(self, text, index=0): table = self.scraper.find_table_with_text(text, index) lst = self.scraper.table_to_2d_list(table) return lst def process_table(self, table_name, start_row, end_row, region_col, infected_col, death_col, recovered_col, table_index=None): if type(table_index) == str and table_index.isnumeric(): table = self.search_table(table_name, int(table_index) - 1) else: table = self.search_table(table_name) if start_row.isnumeric(): table = table[int(start_row) - 1:] if end_row.lstrip('-+').isnumeric(): table = table[:int(end_row)] result = [] for row in table: region = row[int(region_col) - 1] infected = deceased = recovered = "0" if infected_col.isnumeric(): infected = row[int(infected_col) - 1] infected = infected if infected.strip() else "0" if death_col.isnumeric(): deceased = row[int(death_col) - 1] deceased = deceased if deceased.strip() else "0" if recovered_col.isnumeric(): recovered = row[int(recovered_col) - 1] recovered = recovered if recovered.strip() else "0" d = dict( region=region, infected=infected, deaths=deceased, recoveries=recovered, ) result.append(d) return result
def scrape(self): with open(self.FILENAME, 'r', encoding='utf-8-sig') as f: url_list = f.read().splitlines() app = Scraper() for url in url_list: now = datetime.now().strftime("%Y%m%d_%H%M") self.progress_label.config( text=f"{url_list.index(url)+1}/{len(url_list)}") EXTRACTED = [] p = 1 while p <= self.page.get(): self.progress['value'] = (p / self.page.get()) * 100 root.update_idletasks() data = app.extract_main_url(url, p) EXTRACTED.extend(data) p += 1 if p > self.page.get(): break df = pd.DataFrame(EXTRACTED) df = df.dropna(0, 'all') df.to_csv(f'{now}.csv', encoding='utf-8-sig', index=False) self.progress['value'] = 0 root.update_idletasks() messagebox.showinfo('info', message="크롤링 완료!") return
def test_scrape_price(self): self.assertEqual(type( Scraper.scrape_price( 'https://geizhals.de/hisense-55a7100f-a2286242.html?hloc=at&hloc=de' )), float, msg='price should be float') self.assertEqual(type( Scraper.scrape_price( 'https://geizhals.de/gigabyte-g27qc-a2304341.html?hloc=at&hloc=de' )), float, msg='price should be float') self.assertEqual(type( Scraper.scrape_price( 'https://geizhals.de/apple-iphone-se-2020-64gb-schwarz-a2273374.html' )), float, msg='price should be float') self.assertEqual(type( Scraper.scrape_price( 'https://geizhals.de/apple-macbook-air-space-gray-mwtj2d-a-a2255044.html' )), float, msg='price should be float') self.assertEqual(type( Scraper.scrape_price( 'https://geizhals.de/apple-ipad-10-2-128gb-mw772fd-a-mw772ll-a-a2132800.html' )), float, msg='price should be float')
def get_session_results(url, race_country, session_name, year): # Obtain cached results from database entry = DBManager.get_session_results_entry(race_country, session_name) # Check if valid using year/season and if empty. If valid, return if entry: cached_session_results = entry[0][0] if cached_session_results: json_year = cached_session_results['year'] if json_year == year: print(session_name + " results obtained from cache") return cached_session_results # Otherwise, scrape session_results = {} if session_name[:2] == 'fp': session_results = Scraper.scrape_practice_results(url) elif session_name[0] == 'q': session_results = Scraper.scrape_qualifying_results(url) else: session_results = Scraper.scrape_race_results(url) # Add year to showtimes data to depict season session_results['year'] = year # Update cached showtimes file in database DBManager.update_session_results_entry(race_country, session_name, session_results) print("Showtimes obtained from website") return session_results
def downloadJobs(keyword, sql): scraper = Scraper(keyword) scraper.sql = sql print("Jobs count: " + str(scraper.jobCount)) scraper.searchForKeyword() print("Finished downloading")
def traverse(node): children = [] for href in node.children_href: # initialize Scraper for this page scrap = Scraper(href) # grab features from the soup dest = scrap.create_destination() # find children in the soup if any dest.children_href = scrap.get_children() # recursively deeper down the tree if this is an area if dest.children_href != None: traverse(dest) # inner traverse function has returned with destination object print dest.href children.append(dest) node.children = children return node
def get_us_stats(self): self.scraper = Scraper( url="https://www.worldometers.info/coronavirus/country/us/") table = self.search_table_by_id('usa_table_countries_today') res = self.process_country_table(table, "USA", "3", "-11", "1", "2", "4", "", "6") return res
def scrape(): engine = create_engine('sqlite:///events_db.sqlite') Base.metadata.bind = engine DBSession = sessionmaker(bind=engine) session = DBSession() scraper = Scraper() events = scraper.get_data() for event in events: ev = session.query(Event).filter_by( identifier=event['identifier']).first() if not ev: session.add( Event(name=event['name'], location=event['location'], link=event['link'], short=event['short'], date=event['date'], source=event['source'], identifier=event['identifier'])) session.commit() session.close()
def fetchGamelogs(self, player): # Delete all gamelogs for player for gamelog in player.gamelogs: session.delete(gamelog) scraper = Scraper() logs = scraper.getGamelogs(player.id) gamelogs = [] for log in logs: gamelog = Gamelog() gamelog.player = player gamelog.game_id = log['game_id'] gamelog.MIN = log['MIN'] gamelog.FGM = log['FGM'] gamelog.FGA = log['FGA'] gamelog.FG_PCT = log['FG_PCT'] gamelog.FG3M = log['FG3M'] gamelog.FG3A = log['FG3A'] gamelog.FG3_PCT = log['FG3_PCT'] gamelog.FG3M = log['FTM'] gamelog.FG3A = log['FTA'] gamelog.FG3_PCT = log['FT_PCT'] gamelog.OREB = log['OREB'] gamelog.DREB = log['DREB'] gamelog.REB = log['REB'] gamelog.AST = log['AST'] gamelog.STL = log['STL'] gamelog.BLK = log['BLK'] gamelog.TOV = log['TOV'] gamelog.PTS = log['PTS'] gamelog.DK = self.calcDK(log) gamelogs.append(gamelog) session.add_all(gamelogs) session.commit()
def test_scraper_results(self): # A simple scraping test, comparing the results to actual scores from December 25th, 2019 scraper = Scraper("https://www.espn.com/nba/scoreboard/_/date/20191205") scraper.scrape() results = [{'name': 'Philadelphia 76ers at Washington Wizards', 'date': '2019-12-06', 'time': '00:00', 'teams': {'teamA': 'Washington Wizards', 'teamAScore': '119', 'teamALogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/wsh.png', 'teamB': 'Philadelphia 76ers', 'teamBScore': '113', 'teamBLogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/phi.png'}}, {'name': 'Houston Rockets at Toronto Raptors', 'date': '2019-12-06', 'time': '00:30', 'teams': {'teamA': 'Toronto Raptors', 'teamAScore': '109', 'teamALogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/tor.png', 'teamB': 'Houston Rockets', 'teamBScore': '119', 'teamBLogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/hou.png'}}, {'name': 'Denver Nuggets at New York Knicks', 'date': '2019-12-06', 'time': '00:30', 'teams': {'teamA': 'New York Knicks', 'teamAScore': '92', 'teamALogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/ny.png', 'teamB': 'Denver Nuggets', 'teamBScore': '129', 'teamBLogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/den.png'}}, {'name': 'Phoenix Suns at New Orleans Pelicans', 'date': '2019-12-06', 'time': '01:00', 'teams': {'teamA': 'New Orleans Pelicans', 'teamAScore': '132', 'teamALogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/no.png', 'teamB': 'Phoenix Suns', 'teamBScore': '139', 'teamBLogo': 'https://a.espncdn.com/i/teamlogos/nba/500/scoreboard/phx.png'}}] self.assertEqual(scraper.data, results)
def __init__(self, url, file_name): """ A constructor of a MoviesScraper object. :param url: the url address to scrape from. :param file_name: the file of the name where the output should be redirected to. """ Scraper.__init__(self, url, file_name, "movies")
def scrapeProduct(name=None): if request.method == 'GET': return render_template('scrape.html', name=name) elif request.method == 'POST': request.form["product-url"] sc = Scraper() product = sc.scrape(request.form["product-url"]) return redirect('/product-details/' + str(product.id) + "/")
def get_global_stats(self): print(f"Fetching global stats.") self.scraper = Scraper( url="https://www.worldometers.info/coronavirus/#countries") table = self.search_table_by_id('main_table_countries_today') res = self.process_table(table, "9", "-8", "2", "3", "5", "7", "8", "10", "11", "12", "4", "6", "14") return res
def test_scraper(self): sc = Scraper() sc.scrape() self.assertTrue(os.path.isfile('spoilers.json')) self.assertTrue(os.path.isfile('not_spoilers.json')) self.assertTrue(os.path.isfile('credentials.json'))
def get_global_stats(self): print(f"Fetching global stats.") self.scraper = Scraper( url= "https://en.m.wikipedia.org/wiki/2019%E2%80%9320_coronavirus_pandemic_by_country_and_territory" ) res = self.process_table("Countries and territories", "2", "-2", "2", "3", "4", "5") return res
def scrape(): site = request.args.get('site', None) obj = Scraper() keyword_dict = obj.keywords(site) # list=[] # for key in keyword_dict: # list.append(key + ': ' + str(keyword_dict[key])) # keywords = "".join(list) return render_template('Display.html', site=site, keywords=keyword_dict)
def __init__(self): self.setup_logging() self.game_terms = load_search_terms("search-terms.json") self.log.debug("Game terms loaded.") self.scraper = Scraper() self.log.debug("Scraper initialized.") self.db = SqliteInterface() self.log.debug("Database interface initialized.") self.update_games()
def post(self): account = db.get(db.Key(self.request.get('key'))) if account.source.name == 'Spore': UpdatePage.merge_achievements(account, Scraper.scrape_spore(account.credentials)) elif account.source.name == 'Steam': UpdatePage.merge_sources(account, Scraper.scrape_steam(account.credentials)) elif account.source.created_by != None and account.source.created_by.name == 'Steam': UpdatePage.merge_achievements(account, Scraper.scrape_steam_game(account.credentials, account.source.url)) self.redirect('/')
def write_to_db(self, header, entities): """ Calls the Parent Class function. Before that, enrichs the movies by adding to them the ratings. :param header: the header to use in the CSV file. :param entities: a list of IMDB movies. """ for movie in entities: ratings_dict = MoviesScraper.get_ratings(movie.name, movie.year) if ratings_dict: movie.set_ratings(ratings_dict) Scraper.write_to_db(self, header, entities)
def send_exploits_to_emails(exploits, emails, DB_NAME): print('Checking the emails to be sent started...') db = DB() conn = db.create_connection(DB_NAME) email2exploits = dict() links2info = dict() for email in emails: exploits_of_email = [] for exploit in exploits: if not db.link_was_sent_to_email(conn, exploit['Link'], email): exploits_of_email.append(exploit['Link']) links2info[exploit['Link']] = None if len(exploits_of_email) > 0: email2exploits[email] = exploits_of_email emails_count = len(email2exploits.keys()) print('{0} email(s) must be sent!'.format(emails_count)) if emails_count != 0: scraper = Scraper() print('Scraping exploits information strated!please wait...') i = 1 for link in links2info.keys(): print(str(i) + '/' + str(len(links2info.keys()))) link_info = scraper.get_exploit_info(link) if link_info != {}: links2info[link] = link_info else: links2info.pop(link, None) i = i + 1 print('Scraping exploits information done!') try: print('Sending emails started!please wait...') context = ssl.create_default_context() with smtplib.SMTP_SSL("smtp.gmail.com", 465, context=context) as server: server.login(Email, Password) for email, links in email2exploits.items(): if send_mail(server, Email, 'New exploits of 0day.today', email, [ value for key, value in links2info.items() if key in links ]): emails_to_db = [[ link, email, time.strftime('%d-%m-%Y') ] for link in links] db.insert_emails(conn, emails_to_db) print('Sending emails done!') except Exception: print( "Can't login to mail server!! Please check your mail server authentication policy!" )
def main(): # formatted_time = datetime.datetime.now().strftime("%Y-%m-%d") # logging.basicConfig( # format='%(asctime)s--%(levelname)s:%(message)s', # datefmt='%m/%d/%Y %I:%M:%S %p', # level=logging.INFO) scraper = Scraper(headless=HEADLESS) # course = scraper.scrape_specific_course('M', 32, deep=DEEP) LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' for letter in LETTERS: print("LETTER", letter) scraper.select_letter(letter)
def __init__(self, config): """ Parameters ---------- config : Configuration Configuration settings for the web scraper """ Scraper.__init__(self, config.chromedriver_path, config.headless) self.open_url('https://calendar.buffalo.edu/', 'list-event') self.config = config self.event_list = []
def search(self, msg): """ Search the web for a certain request :param msg: request to search the internet :return: None """ web = Scraper() res = web.search(msg) if res is not None: self.respond("Sorry, I could not find an answer to that") else: self.respond("I found this on the web. " + str(res))
def main(self): scrape = Scraper(self.from_ct, self.date, self.time) data = scrape.fetch_full() nj = NJParser(data[0]).parse_data() ct = CTParser(data[1]).parse_data() if self.from_ct: schedule = Scheduler(ct, nj, self.low, self.high).generate() else: schedule = Scheduler(nj, ct, self.low, self.high).generate() message = "Train schedules for " + self.date.strftime('%Y-%m-%d') Emailer(self.password, self.email, self.file).send_email(message + ":\n" + schedule, message)
def fetchPlayers(self): scraper = Scraper() players = scraper.getPlayerIds() instances = [] for player in players: if player['id'] == 299: continue player_inst = Player(name=player['name'], id=player['id']) instances.append(player_inst) session.add_all(instances) session.commit()
def crawl(self, link, depth, keyword, posDict): print("Depth of " + str(depth) + " : " + link) if link in self.linksCrawled: return for item in self.linksCrawled: if link[:100] == item[:100]: return if depth >= self.depth: return self.linksCrawled.append(link) scraper = Scraper(link, keyword) hyperLinkList = scraper.scrapeLinks(link) articleDictionary = scraper.scrape('p') print(articleDictionary) for item in articleDictionary: if item not in self.crawlContents: self.crawlContents.append(item) if isinstance(item, str): tempDictForContents = POS.POS(item) keys = tempDictForContents.keys() for i in keys: listUrl = (i, link) if listUrl not in posDict: posDict[listUrl] = tempDictForContents[i] else: posDict[listUrl] = posDict[ listUrl] + tempDictForContents[i] print(item) if len(self.crawlContents) + len( self.contents) <= self.sampleSize and len( articleDictionary) != 0 and len( self.linksCrawled) <= self.sampleSize: self.linksCrawled.append(link) if hyperLinkList is not None: for link in hyperLinkList: if link not in self.linksCrawled: if "ad" not in link: _thread.start_new_thread(self.crawl, ( link, depth + 1, self.keyword, posDict, ))
def main(): # formatted_time = datetime.datetime.now().strftime("%Y-%m-%d") # logging.basicConfig( # format='%(asctime)s--%(levelname)s:%(message)s', # datefmt='%m/%d/%Y %I:%M:%S %p', # level=logging.INFO) scraper = Scraper(headless=HEADLESS) course = scraper.scrape_specific_course('A', 1, deep=DEEP) # LETTERS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' # for letter in LETTERS: # print("LETTER", letter) # scraper.select_letter(letter) with open('data_dump/test.json', 'w') as out: json.dump(course, out)
def fetchPickablePlayers(self): for player in session.query(Player): player.pickable = False scraper = Scraper() pickable_players = scraper.getPickablePlayers() for i, obj in enumerate(pickable_players): player = Player.fromName(obj['name']) print player.name + " (%s/%s)" % (i, len(pickable_players)) player.pickable = True player.salary = obj['salary'] player.pos = obj['pos'] self.fetchGamelogs(player) session.commit()
def scrape_and_crawl(input_page: str, file_path: str, link_status_report: dict = {}, all_checked_links: dict = {}, is_local_file: bool = False): scraper = Scraper() if is_local_file: links = list(scraper.extract_links(input_page, "")) else: links = list(scraper.extract_links(input_page, file_path)) crawler = Crawler(urls=links, checked=all_checked_links) crawler.crawl() checked_links = crawler.get_responses() link_status_report[file_path] = checked_links return checked_links, crawler.get_checked()
def message_response_f(bot, update): # Bot was directly referenced here if update.message.text[:len(BOT_NAME)] == BOT_NAME: query = update.message.text[len(BOT_NAME) + 1:] query_l = query.split(" ")[0].lower() msg = update.message.text bot.send_message(chat_id=update.message.chat_id, text="Please hold on and let me check on that..") msg = msg.replace("SaucePlzBot ", "").replace("@", "") try: originalMessage = msg for item in singlish.singlish_l: msg.replace(item[0], item[1]) entry = Entry(msg) item = entry.start() scraper = Scraper(item, "") dictionary = scraper.scrape('p') textList = [] # print(dictionary) for tempItem in dictionary: if isinstance(tempItem, list): for item2 in tempItem: item2 = item2.replace(" ", "").replace( "\n", " ").replace("\r", " ") textList.append(item2) else: textList.append(tempItem) for tempItem in textList: print(tempItem) # Greetings if query_l in greetings_l and len(query_l) < 2: msg = "Hi there, I'm here for all your sauce-related needs" for tempItem in textList: bot.send_message(chat_id=update.message.chat_id, text=tempItem) except ValueError: bot.send_message( chat_id=update.message.chat_id, text="Sorry I could not find anything that is related to : " + originalMessage)
def get_showtimes(season, url, race_country): # Obtaining cached standings data from database entry = DBManager.get_showtimes_entry(race_country) # Check cached data exists and is from the current season to be # valid. If valid, then return if entry: cached_showtimes_data = entry[0][0] if cached_showtimes_data: json_year = cached_showtimes_data['year'] if json_year == season: print("Showtimes obtained from cache") return cached_showtimes_data # Scrape showtimes from url website showtimes_data = Scraper.scrape_showtimes(season, url) if showtimes_data == {}: print("Showtimes unavailable as session has elapsed") return showtimes_data # Add year to showtimes data to depict season showtimes_data['year'] = season # Update cached showtimes file in database DBManager.update_showtimes_entry(race_country, showtimes_data) print("Showtimes obtained from website") return showtimes_data
def run(self): # create tor here self.opener = UseTor( 'mypasswordAndy', self.socks_proxy_port, self.base_control_port, pass_function) while True: try: self.zip_code = self.queue.get() #Look for end if self.zip_code == None: #Notify for next worker self.queue.put(None) break # end the while true loop #from here pass the opener connection to Scraper #which will update the housing database Scraper(self.opener,self.zip_code) self.coll.update({'zip_code':self.zip_code}, {'$set': {'finished':1}}) except: print "Unexpected error:", sys.exc_info()[0] pass # leave this element for the next cycle #when process is finished close the database connection self.conn.close()
def main(args): CONFIG = getCommandLineConfig(args) if not CONFIG: CONFIG = getConfigFromFile() print("_________________________________") print(" Marktplaats Scraper ") print(" ") print(" by jaspercardol") print("_________________________________") print(" ") print("Configuration:") print("Scanning interval: {}".format(CONFIG['scanning_interval'])) print("Pushover API Token: {}".format(CONFIG['pushover_api_token'])) print("Pushover User Key: {}".format(CONFIG['pushover_user_key'])) print(" ") notifier = NotificationService(CONFIG['pushover_api_token'],CONFIG['pushover_user_key']) fileWatcher = FileWatcher() print("Initial file scan found {} queries.".format(len(fileWatcher.getFiles()))) scraper = Scraper(CONFIG['webdriverpath']) try: while True: files = fileWatcher.getFiles() for file in files: query = files[file]['query'] filepath = files[file]['listing_filepath'] check_for_updates(filepath,query,scraper, notifier) time.sleep(CONFIG['scanning_interval']) except KeyboardInterrupt: exit()
def main(): arguments = Arguments() outputFile = CSVFile(arguments.outputFile) scraper = Scraper(arguments.name, arguments.location, arguments.street) done = False while not done: try: scraper.getPage() scraper.parsePage() scraper.getPeopleInfo(outputFile) scraper.changePageNumber() except Exception as error: done = True print("Done.") print(error)
def __init__(self, username, linkLevel, driver=None): if driver is None: self.driver = webdriver.Chrome('./chromedriver') self.username = username self.usernameLink = Scraper.getUserLink(username) self._linkLevel = self.linkLevel = linkLevel self.images = {}
def main(): parser = argparse.ArgumentParser(description='Fetches and parses data from House.gov for twosixtynine') parser.add_argument('-t','--threads', help='Number of threads for scraper', required=False) parser.add_argument('-y','--year', help='Year of Congress', required=True) parser.add_argument('-li','--limit', help='Inclusive limit on last vote number to be crawled in series (001 to {limit incl.})', required=True) args = parser.parse_args() limit = int(args.limit) year = args.year or '2011' threads = int(args.threads or 10) scraper = Scraper(threads) for i in xrange(1,limit+1): if i < 10: j = '00' + str(i) elif i < 100: j = '0' + str(i) elif i < 1000: j = str(i) scraper.add_to_queue('http://clerk.house.gov/evs/{0}/roll{1}.xml'.format(year, j)) scraper.start_process_queue()
def update(self, year, season): print("Starting update...") uia_courses = UIA(1) uia_subject = UIA(2) scraper = Scraper(season, year) scraper.scrape(uia_courses) scraper = Scraper(season, year) scraper.scrape(uia_subject) return "Done!"
def get_profile_urls_repin_rate(path_to_proxies, profile_url): headers = {'User-agent': 'Mozilla'} resp, hdrs = service_fetcher.ServiceFetcher().fetchURL(profile_url) pin_urls = get_pin_urls(resp) data, num_failures, num_success = Scraper.scrape(pin_urls, DummyScraperClass, path_to_proxies, headers) print "%s failures, %s successes" % (num_failures, num_success) # Merge result data into a single hash summary_data = {} for datum in data: summary_data.update(datum) is_repin_values = map(lambda x: x['is_repin'], filter(lambda x: not(x is None), summary_data.values())) num_repins = is_repin_values.count(True) num_reg_pins = is_repin_values.count(False) print "Number pins for %s : %s" % (profile_url, len(pin_urls)) print " Number repins: %s" % num_repins print " Number regular pins: %s" % num_reg_pins print "%% Repins: %s" % (num_repins / float(num_repins + num_reg_pins))
def main(): scraper = Scraper(threshold=3) scraper.run() for k, v in scraper.tree.out().items(): print k,v
from Scraper import Scraper s = Scraper('test') s.run()
def start_scraper(uia_object): scraper = Scraper(season, year) scraper.scrape(uia_object)
def main(): scraper = Scraper("*****@*****.**") with open('gids.txt') as f: lines = f.read().splitlines() entries = [scraper.getEntry(gid, Entry.GENOME) for gid in lines] write_entries(entries, "genome")
# recursively deeper down the tree if this is an area if dest.children_href != None: traverse(dest) # inner traverse function has returned with destination object print dest.href children.append(dest) node.children = children return node if __name__ == '__main__': scrap = Scraper(ROOT_HREF) # iterate through areas immediately below for state_href in scrap.get_children(): # initialize root destination with children scrap = Scraper(state_href) dest = scrap.create_destination() dest.children_href = scrap.get_children() # check if we have already crawled this area OBJECT_OUTFILE = DATA_DIR + dest.nickname + '.pickle' if os.path.exists(OBJECT_OUTFILE): print dest.nickname + ' has already been crawled' pass else:
from Scraper import Scraper deptsURL = "http://ninjacourses.com/explore/4/" deptUrlStub = deptsURL + "department/" ts = Scraper(deptsURL) deptList = ts.getDeptStubs() for dept in deptList: print dept courseList = ts.getCourses(dept) courseTimes = ts.getCourseTimes(courseList,dept) for course in courseTimes: i = 0 print "\n" + course + " " + courseTimes[course][2][0] for time in courseTimes[course][0]: if ("M" in time and "W" in time) or ("T" in time and "R" in time) or (len(courseTimes[course]) == 1): print courseTimes[course][1][i] + " " + time i = i + 1 else: print time #courseList = ts.getCourses() #courseTimes = ts.getCourseTimes(courseDict) #courseUnitsAndProfessor = ts.getCourseUnitsAndProfessors(courseDict)
class DummyScraperClass: @staticmethod def scrape(html): """ Scraping code goes here. """ return {'html' : html, 'some_var': 'some_val'} class Timer: def __enter__(self): self.start = time.clock() return self def __exit__(self, *args): self.end = time.clock() self.interval = self.end - self.start if __name__ == '__main__': path_to_proxies = raw_input("Abs path to folder with proxy zip files") # Run proxy_zip_downloader.py first path_to_proxies += "" if path_to_proxies.endswith("/") else "/" headers = {'User-agent': 'Mozilla'} urls = ['http://madmenandmayhem.tumblr.com','http://sinaisatevcls.tumblr.com','http://emmph.tumblr.com','http://luuuaaan.tumblr.com','http://ungkumariam.tumblr.com','http://santanawolrd.tumblr.com','http://trashcook-m**********r.tumblr.com','http://moddolly.tumblr.com','http://surimillion.tumblr.com','http://texas-lilly.tumblr.com','http://doce-ilusion.tumblr.com','http://lovegemstudio.tumblr.com','http://shaneinspain.tumblr.com','http://lam-is-a-panda.tumblr.com','http://nonolimitemusic.tumblr.com','http://thosethreadsdotcom.tumblr.com','http://mrs--schlierenzauer.tumblr.com','http://chloeandisabel-byjeanine.tumblr.com','http://shatteruslikeglass.tumblr.com','http://andreadellamore.tumblr.com','http://fdrake88.tumblr.com','http://nuas-e-cruas.tumblr.com','http://leahjihaechoi.tumblr.com','http://rodrigohudgens.tumblr.com','http://crazycoolvintage.tumblr.com','http://evilboyforlifeyebo.tumblr.com','http://kawaii-ph.tumblr.com','http://curvily.tumblr.com','http://ilvzvhg.tumblr.com','http://melodychauv.tumblr.com','http://hypsta4513.tumblr.com','http://kattancock.tumblr.com','http://t3stingth3mes.tumblr.com','http://mydreamsmyuniqueworld.tumblr.com','http://oli-worlds.tumblr.com','http://gap.tumblr.com','http://zethie.tumblr.com','http://codkwassa.tumblr.com','http://heysnapshot.tumblr.com','http://isaacdoodles.tumblr.com','http://mossmarchen.tumblr.com','http://lizryabinina.tumblr.com','http://forever-a-zayngel.tumblr.com','http://sofyajr.tumblr.com','http://andreafindyway.tumblr.com','http://fashioninfographics.com','http://hello-sexy-babes.tumblr.com','http://dollyale.tumblr.com','http://loveglam.tumblr.com','http://acutestyle.tumblr.com','http://tuesdaysoff.tumblr.com','http://hooperphotography.tumblr.com','http://brushwoodbarbie.tumblr.com','http://mydadshotfilm.tumblr.com','http://atthepinkhouse.tumblr.com','http://dapper-as-fck.tumblr.com','http://dependentonthewinds.tumblr.com','http://heartofwildwolf.tumblr.com','http://omgkymsketchposts.tumblr.com','http://jordanhomvstheworld.tumblr.com','http://goodnight-lenin.tumblr.com','http://vuitton-versace-and-chanel.tumblr.com','http://jsarloutte.tumblr.com','http://jennacandy.tumblr.com','http://camillepomme.tumblr.com','http://apieceof-kay-ke.tumblr.com','http://alicematsumotochan.tumblr.com','http://alain-convard.tumblr.com','http://wtfbelleville.tumblr.com','http://xxfactorygirlxx.tumblr.com','http://ishadinio.tumblr.com','http://butdemfeelsdoh.tumblr.com','http://parmavintage.tumblr.com','http://backslashgallery.tumblr.com','http://johnnythehorse.tumblr.com','http://pisuilnano.tumblr.com','http://pizzadiloretta.tumblr.com','http://deadhole.tumblr.com','http://marine-illustration.tumblr.com','http://itslouuisee.tumblr.com','http://classyandbeautyfor.tumblr.com','http://youcantmakehomesfrompeople.tumblr.com','http://vasilyt.tumblr.com','http://collabdummie.tumblr.com','http://sloanparker.tumblr.com','http://snakesofliz.tumblr.com','http://faithhopechairty.tumblr.com','http://ladylondonthecorgi.tumblr.com','http://iziaa.tumblr.com','http://blakewhittemore.tumblr.com','http://muellfees-nature.tumblr.com','http://odinsmusings.tumblr.com','http://decemberdork.tumblr.com','http://neepaakther.tumblr.com','http://vidztalive.tumblr.com','http://melha.tumblr.com','http://roomtobreathe-promo.tumblr.com','http://humangorilla.tumblr.com','http://runningwithdreamsx.tumblr.com','http://kelinha18.tumblr.com','http://mhthagreat.tumblr.com','http://mr-micks-muses.tumblr.com','http://courtneyaxfordemp.tumblr.com','http://korikora.tumblr.com','http://blackybek.tumblr.com','http://workliveloveplaymusic.tumblr.com','http://caribemonster.tumblr.com','http://pandadoggy04.tumblr.com','http://patrikbosen.tumblr.com','http://danooshaaa.tumblr.com','http://bioview.tumblr.com','http://amphaaron.tumblr.com','http://miihcarvalho.tumblr.com','http://thenomadicjourney.tumblr.com','http://burjir.tumblr.com','http://fuckyeahdash.tumblr.com','http://delicious-designs.tumblr.com','http://dranaerys.tumblr.com','http://secondsocialss.tumblr.com','http://solaliv.tumblr.com','http://drethebarber.com','http://thenataschamalinka.tumblr.com','http://friendorfauxclothing.tumblr.com','http://wanderlustchild09.tumblr.com','http://mcjustinx.tumblr.com','http://schoolesshit.tumblr.com','http://perfectfitthings.tumblr.com','http://ravenclwesome.tumblr.com','http://biriuct.tumblr.com','http://rambrreezy.tumblr.com','http://georgerascon.tumblr.com','http://janine201222.tumblr.com','http://miawaldecker.tumblr.com','http://what-id-wear.com','http://anitafilotas.tumblr.com','http://smilenow-crytomorrow.tumblr.com','http://madamebluprint.tumblr.com','http://holla-for-a-dollar-hoe.tumblr.com','http://sixbysixbyseven.tumblr.com','http://catalinamaradona.tumblr.com','http://vigran.tumblr.com','http://dimostheniskapa.tumblr.com','http://if-you-leave.tumblr.com','http://remisson.tumblr.com','http://dirtypairofjeans.tumblr.com','http://hlmrzl.tumblr.com','http://cameralenscomputerxperiment.tumblr.com','http://krsmf.tumblr.com','http://with-spicy.tumblr.com','http://yumology.tumblr.com','http://paleetasummer.tumblr.com','http://youarebeautyxx.tumblr.com','http://frankie-ismyname.tumblr.com','http://eclip--se.tumblr.com','http://hi-lovelaay.tumblr.com','http://laraverheijden.tumblr.com','http://opentoesandhose.tumblr.com','http://rblackwell.tumblr.com','http://boys-in--bands.tumblr.com','http://blackskull98.tumblr.com','http://youaremyworld99.tumblr.com','http://scale-gods-punish-me.tumblr.com','http://phipvonkorsakow.tumblr.com','http://sneakers.wantering.com','http://0rom0.tumblr.com','http://heelsandfashion.tumblr.com','http://celiapap.tumblr.com','http://lolabcd.tumblr.com','http://bringmemorefat.tumblr.com','http://royalungs.tumblr.com','http://whatmeredithwore.tumblr.com',' http://zelmarl.tumblr.com','http://napoleonfour.com','http://your-dream-bedroom.tumblr.com','http://liveinpaaradise.tumblr.com','http://wearethesecretcity.tumblr.com','http://driedfish.tumblr.com','http://bramos-doritos.tumblr.com','http://freetoworshiphim.tumblr.com','http://gildafuentes.tumblr.com','http://intoxicated-kisses.tumblr.com','http://dakilanggerlpren.tumblr.com','http://whatheheckbeck.tumblr.com','http://solozus.tumblr.com','http://toothless4835.tumblr.com','http://tecdemty.tumblr.com','http://beautifuladybug.tumblr.com','http://fizzbee.tumblr.com','http://w0lves-start-to-sing.tumblr.com','http://h3y-f**k-you.tumblr.com','http://cinefamily.tumblr.com','http://westcoastpov.com','http://totallydeathbats.tumblr.com','http://talktosantanalopez.tumblr.com','http://thatwritererinoriordan.tumblr.com','http://kickassvideo.tumblr.com','http://micaparris.tumblr.com','http://jgomespalmeirense.tumblr.com','http://sskin88ts.tumblr.com','http://santosfcmusings.tumblr.com','http://borgcast.tumblr.com','http://jhoshuex.tumblr.com','http://kdamaceno.tumblr.com','http://leobolfarini.tumblr.com','http://andiscamilo.tumblr.com','http://deckexpress.tumblr.com','http://todas-amam.tumblr.com','http://schemingangels.tumblr.com','http://pornossoalviverde.tumblr.com','http://lalalandstore.tumblr.com','http://cristopherjonas.tumblr.com','http://mayfurtado.tumblr.com','http://spromo.tumblr.com','http://thamyroocha.tumblr.com','http://carlos-costa.tumblr.com','http://volcanoink.tumblr.com','http://chanelbagsandcigarettedrags.com','http://playerpopimp.tumblr.com','http://whatupshann.tumblr.com','http://emmamaritte.tumblr.com','http://7832point5milesaway.tumblr.com','http://wonderscent.tumblr.com','http://faithbabe.tumblr.com','http://lovelyinla.tumblr.com','http://thunderbirdrobot.tumblr.com','http://acheloi-s.tumblr.com','http://sporadicthought.tumblr.com','http://ohchanapha.tumblr.com','http://hellboywearshotpants.tumblr.com','http://scotianostra.tumblr.com','http://gettyimages.andresruffo.com','http://ukskinnygirl.tumblr.com','http://mettelor.tumblr.com','http://bibros.tumblr.com','http://imakittenforsir.tumblr.com','http://hampton-emma.tumblr.com','http://gabrielucifer.tumblr.com','http://partyupson.tumblr.com','http://brendacdsissy.tumblr.com','http://sfarjal.com','http://itsinfinitelove.tumblr.com','http://adrianaxo13.tumblr.com','http://angelmonsanto.tumblr.com','http://erremin.tumblr.com','http://stephanhauptmann.tumblr.com','http://knockknockknockkinsy.tumblr.com','http://xalrightx.tumblr.com','http://doctorspantry.com','http://noesdieta.tumblr.com','http://fashion-ramblings.tumblr.com','http://translatable.tumblr.com','http://gtrancefamily.tumblr.com','http://the-dark-side-of-sun.tumblr.com','http://gabbomx.tumblr.com','http://joel-courtney.tumblr.com','http://healthcarebiz.tumblr.com','http://theclarkknight.tumblr.com','http://crudeoilfacilitators.tumblr.com','http://wirhabenkeinezeit.tumblr.com','http://whothafuckarethearcticmonkeys.tumblr.com','http://nhimted.tumblr.com','http://aolchatroom.tumblr.com','http://poppascrew.tumblr.com','http://weirdoclubbcn.tumblr.com','http://blackcottoncandy.tumblr.com','http://runnin-from-cops.tumblr.com','http://alexanderthesalamander.tumblr.com','http://gaserranos.tumblr.com','http://jettgamer.tumblr.com','http://imaginationly.tumblr.com','http://careterrazzo.tumblr.com','http://emmyland.tumblr.com','http://malevolentjosh.tumblr.com','http://leahcecilia.tumblr.com','http://kelmichellez.tumblr.com','http://joalavez.tumblr.com','http://musicmelbourneandme.tumblr.com','http://burmecias-protector.tumblr.com','http://movietitleinferno.tumblr.com','http://blvck-laced.tumblr.com','http://smuttydirection.tumblr.com','http://fadingserotonin.tumblr.com','http://nintendogamersblog.tumblr.com','http://lookitslilydepp.tumblr.com','http://emma-phelps.tumblr.com','http://tearscanbeblue.tumblr.com','http://myworldswaggy.tumblr.com','http://villegas-bizzle-angel.tumblr.com','http://overthemoons.tumblr.com','http://praisejelena.tumblr.com','http://misscirclenyc.tumblr.com','http://homobaeddelicus.tumblr.com','http://thelesbian401.tumblr.com','http://party-music.tumblr.com','http://fwsjuice.tumblr.com','http://latenightparty.tumblr.com','http://minadesadefatale.tumblr.com','http://3bigmamas.com','http://shaylarochellesoring.tumblr.com','http://ninehundredfour.tumblr.com','http://hotbabespictures.tumblr.com','http://teenagerfagay.tumblr.com','http://jjjaninaaa.tumblr.com','http://justindz.tumblr.com','http://daniellegrand.tumblr.com','http://phillipenover.tumblr.com','http://rhiannonwilliamsceramics.tumblr.com','http://sexgamesandrocknroll.tumblr.com','http://maidenofeddie.tumblr.com','http://themegawattsradioshow.tumblr.com','http://errmahgerdbailey.tumblr.com','http://tna-promo.tumblr.com','http://talamasi.tumblr.com','http://cafecmoda.tumblr.com','http://srsfunny.tumblr.com','http://mildlyoffensivemusings.tumblr.com','http://catscean.tumblr.com','http://charleyhyde.tumblr.com','http://vedo.tumblr.com','http://theblazedraver.tumblr.com','http://keepingthebluesalive.tumblr.com','http://curvy-snow.tumblr.com','http://pammktgnut.tumblr.com','http://caty-santos.tumblr.com','http://wjsginternetradio.tumblr.com','http://son-of-winter-and-stars.tumblr.com','http://perfectlypretzeled.tumblr.com','http://wrestlinggreats.tumblr.com','http://abbyghost.tumblr.com','http://mirrorneurons.tumblr.com','http://lonelymelodiess.tumblr.com','http://pluots-in-april.tumblr.com','http://mauhimei.tumblr.com','http://musicmasses.tumblr.com','http://chunkychick.tumblr.com','http://rocket-queen-98.tumblr.com','http://newwyorks.tumblr.com','http://justinsforever.tumblr.com','http://ilovetheswagstyle.tumblr.com','http://papuboart.tumblr.com','http://jelenademigomezbieberlover.tumblr.com','http://bieber-space.tumblr.com','http://ohaiiikakaaa.tumblr.com','http://gokidrauhlsavedme.tumblr.com','http://mccanard-kenny.tumblr.com','http://acelyakartal.tumblr.com','http://healthy-lifeeestyle.tumblr.com','http://michaelbahm.tumblr.com','http://vasilis-p.tumblr.com','http://c-giada.tumblr.com','http://sonnenkindx3.tumblr.com','http://minimoose218.tumblr.com','http://ohlookatthesea.tumblr.com','http://tabithagale1688.tumblr.com','http://thejapanblr.tumblr.com','http://blakevjones.tumblr.com','http://darksidelightlydarkly.tumblr.com','http://kjarta.tumblr.com','http://bestoffates.tumblr.com','http://hairitagehydration.tumblr.com','http://whammos.tumblr.com','http://moviegifsthatrock.tumblr.com','http://www.rdubaton.com','http://pinalshah.tumblr.com','http://dressmeupcostumes.tumblr.com','http://vickhiz.tumblr.com','http://nwufochaser.tumblr.com','http://dontletitgotoyourheadd.tumblr.com','http://agentcharliedeltah.tumblr.com','http://mollybeachmurphy.tumblr.com','http://brielle375.tumblr.com','http://directionergirl04.tumblr.com','http://theshuttsy.tumblr.com','http://itscomplicated837.tumblr.com','http://shelleygodfrey.tumblr.com','http://endlessmike420.tumblr.com','http://kyutebunny.tumblr.com','http://luckystrikev3.tumblr.com','http://eleternoaspirante.tumblr.com','http://naomilovesme.tumblr.com','http://punkbtm.tumblr.com','http://rckbands4life.tumblr.com','http://tomhardydotorg.tumblr.com','http://progtracks.tumblr.com','http://ourimagefest.tumblr.com','http://naghisa.tumblr.com','http://crawlerted.tumblr.com','http://notquitewicked.tumblr.com','http://julietsroses.tumblr.com','http://good-foor-yoou.tumblr.com','http://leedonghae.iloveeyou.com','http://madmag1c.tumblr.com','http://summerswithoutwar.tumblr.com','http://not-punk-enough.tumblr.com','http://jordire.tumblr.com','http://darebear-love.tumblr.com','http://kookie-tree.tumblr.com','http://cshew.tumblr.com','http://coordinate-butt-to-mouth.tumblr.com','http://urgrunge.tumblr.com','http://kaitlynabroad.tumblr.com','http://sweetnessandspicy.tumblr.com','http://pinkwhiskeystains.tumblr.com','http://hearts-exo.tumblr.com','http://modusmo.tumblr.com','http://bruhhmansur.tumblr.com','http://jenniferpena93.tumblr.com','http://www.horroroftruant.com','http://w-justone-shoe.tumblr.com','http://mauri-jackson.tumblr.com','http://merchantshipsinker.tumblr.com','http://kierongillen.tumblr.com','http://davidlavieri.tumblr.com','http://kammiekay.tumblr.com','http://behindthebrandmedia.tumblr.com','http://3maul333.tumblr.com','http://ferociafatale.tumblr.com','http://jordanstarbaby.tumblr.com','http://harpoonandpen.tumblr.com','http://asubssoul2013.tumblr.com','http://ballerinka.tumblr.com','http://lucytaylorlove.tumblr.com','http://luv2watchher.tumblr.com','http://farrahfawcettfanclub.tumblr.com','http://deanbelievesinwholockholmes.tumblr.com','http://bl0wjobqueen.tumblr.com','http://zeppling.tumblr.com','http://enigmafantasy.tumblr.com','http://wtwht.tumblr.com','http://marshchocpieimagines.tumblr.com','http://tpcshkr.tumblr.com','http://abottleoffinewine.tumblr.com','http://grey-fields.tumblr.com','http://harryxzayn.tumblr.com','http://askforgottenrainbowdash.tumblr.com','http://ceramicplates.tumblr.com','http://starred.lamelas.org',' http://ghadshaw.tumblr.com','http://anonymousmilitant.tumblr.com','http://othisredding.tumblr.com','http://xlousmiles.tumblr.com','http://smallerbenz.tumblr.com','http://revelationrevealed.tumblr.com','http://hugsandkissesdreamer.tumblr.com','http://scenesfrommybackyard.tumblr.com','http://vcherryvanillav.tumblr.com','http://ninquellote.tumblr.com','http://fggam.tumblr.com','http://futurewasme.tumblr.com','http://alexeybokov.tumblr.com','http://imillernyc.tumblr.com','http://jesscover.tumblr.com','http://lifensoho.tumblr.com','http://sugaryviolet.tumblr.com','http://malloryrunsthis.tumblr.com','http://telerafairlyreie.tumblr.com','http://amberlonsdales.tumblr.com','http://snowsport.tumblr.com','http://slimbonescreatiwear.tumblr.com','http://mynameishapiness.tumblr.com','http://sandshrewvv.tumblr.com','http://skinnysperfection.tumblr.com','http://danienunes.tumblr.com','http://blincmagazine.tumblr.com','http://sweet-meandyou.tumblr.com','http://alfrocks.tumblr.com','http://xshatteredsky.tumblr.com','http://idiotsvernacular.tumblr.com','http://bete67.tumblr.com','http://anthropology0.tumblr.com','http://mattroberge.tumblr.com','http://rnhott.tumblr.com','http://milledacetateinternational.com','http://checkthefeed.com','http://themissq.com','http://shreddingslopes.tumblr.com','http://itsemmadott.tumblr.com','http://hereafterforever.tumblr.com','http://ge-n-ki.tumblr.com','http://t-w-a-m-p.tumblr.com','http://i-am-momentum.tumblr.com','http://fashionexplosion.tumblr.com','http://laurenisland.tumblr.com','http://jungkks.tumblr.com', 'http://dastardlyconclusions.tumblr.com','http://sorbetshawty.tumblr.com','http://shotaforever.tumblr.com','http://idriserba.tumblr.com','http://nemithine.tumblr.com','http://whiskeyandbutterflies.tumblr.com','http://loukkhs.tumblr.com','http://jvlivs.tumblr.com','http://laughingalonewithensalada.tumblr.com','http://mkstyllinski.tumblr.com','http://amster-damn.tumblr.com','http://red-lips-slit-wrists.tumblr.com','http://susydalgesso.tumblr.com','http://orquestre.tumblr.com','http://hepsiyaralarsonuncusuoldurur.tumblr.com','http://mrcik.tumblr.com','http://askaosolimpianos.tumblr.com','http://linasimsek.tumblr.com','http://aceitado.tumblr.com','http://viniciusluparelli.tumblr.com','http://mythemoon.tumblr.com','http://miguelamancio.com','http://edzed.tumblr.com','http://therooftopblog.tumblr.com','http://e102.tumblr.com','http://siirdehayatvar.tumblr.com','http://katrin-zotchev.tumblr.com','http://jerseypicker.tumblr.com','http://incorpora.tumblr.com','http://emmaparaelmundo.tumblr.com','http://diegobonfim.tumblr.com','http://onlyygirlinthewrld.tumblr.com','http://priinc-e.tumblr.com','http://okpyl.tumblr.com','http://gifshuntoffamous.tumblr.com','http://rock-n-roll-baby7.tumblr.com','http://v-odk-a.tumblr.com','http://paznativa.tumblr.com','http://nickneyime.tumblr.com','http://day-goncalves.tumblr.com','http://sakanaguitar.tumblr.com','http://m-gc-nb-ys.tumblr.com','http://thebestrealitytvquotesever.tumblr.com','http://tatiana-the-iron-woman.tumblr.com','http://hotbarrels.tumblr.com','http://thelionneverfearsthejackal.tumblr.com','http://dylanoutsidetheframe.tumblr.com','http://mango2kw.tumblr.com','http://grimmble.tumblr.com','http://ipostfun.tumblr.com','http://v0yages.tumblr.com','http://anichipashvili.tumblr.com','http://retro-beauty.tumblr.com','http://bestretirement.gangsaway.com','http://carlalexandersson.tumblr.com','http://innerlightsurf.tumblr.com','http://wreckatsea.tumblr.com','http://charlsteenkamp.tumblr.com','http://taylorkinneyismyfire.tumblr.com','http://markwickens.tumblr.com','http://bandz-a-make-her-danceee.tumblr.com','http://colleensarahbean.tumblr.com','http://amethystsadachbia.tumblr.com','http://inspiredlawblog.com','http://rebecatr.tumblr.com','http://justlikewales.tumblr.com','http://queenofthekooks.tumblr.com','http://owncentsoftime.tumblr.com','http://iwish-i-wasbritish.tumblr.com','http://itshiddlesbaby.tumblr.com','http://www.hissizadam.com','http://mavipandora.tumblr.com','http://chukulata.tumblr.com','http://azuldecobalt.tumblr.com','http://kiyidan.tumblr.com','http://thingstoseeinflorida.tumblr.com','http://dtknuppe.tumblr.com','http://crazypopperlover.tumblr.com','http://ialways-haveaplan.tumblr.com','http://coolmemefriend2002.tumblr.com','http://nevadatoc.tumblr.com','http://uma-panda-bem-ciumenta.tumblr.com','http://yourflowerswillbloom.tumblr.com','http://laelapsneverbegs.tumblr.com','http://detectiveinspectordonut.tumblr.com','http://koyomi-japan.tumblr.com','http://relic-ario.tumblr.com','http://piriguetando.tumblr.com','http://adventurres.tumblr.com','http://dianamxd.tumblr.com','http://jiqkunii.tumblr.com','http://gomez-perfection.tumblr.com','http://fuking-bob.tumblr.com','http://crazynp.tumblr.com','http://moparandmarlboros.tumblr.com','http://o-nlinediary.tumblr.com','http://marvacu.tumblr.com','http://titanicos.tumblr.com','http://fringeycurlymess.tumblr.com','http://dramatacado.tumblr.com','http://askatruscum.tumblr.com','http://obscessor.tumblr.com','http://fuckifyourdream.tumblr.com','http://apromptripost.com','http://x3b.tumblr.com','http://hereisjonny.tumblr.com','http://toshio-fukawa.tumblr.com','http://camlaan.tumblr.com','http://diarymdstudent.tumblr.com','http://six-shooter-shirogane.tumblr.com','http://un-presentable.tumblr.com','http://foreversparklingg.tumblr.com','http://tryinghuman.tumblr.com','http://lotte8794.tumblr.com','http://martammcsandrade2002.tumblr.com','http://thepoisonus-winchester.tumblr.com','http://meantukin.tumblr.com','http://mariamoliveira.tumblr.com','http://buddhaspalm.tumblr.com','http://thewibbler.tumblr.com','http://chupacabrasmustdie.tumblr.com','http://grammomsblog.tumblr.com','http://moeatthemovies.tumblr.com','http://stillthedoctor.tumblr.com','http://oh-yeezus.tumblr.com','http://disney-barbie.tumblr.com','http://allbandary.tumblr.com','http://fuuhaizu.tumblr.com','http://healthy-marberry.tumblr.com','http://anadebecerra.tumblr.com','http://brazil-healthylife.tumblr.com','http://paradisefitness.tumblr.com','http://usasheeran.tumblr.com','http://suite-vi-samsaric-overture.tumblr.com','http://easier-way.tumblr.com','http://roxy8971.tumblr.com','http://1889-1945.tumblr.com','http://sasakidesign.tumblr.com','http://tolamvienkhoa.tumblr.com','http://masonasilvers.tumblr.com','http://gilagain69.tumblr.com','http://focalli-ze.tumblr.com','http://oatmeal-ape.tumblr.com','http://mauuxd.tumblr.com','http://sunbeeeam.tumblr.com','http://andreanathalia-hautecouture.tumblr.com','http://barhaun.tumblr.com','http://sad-sam.tumblr.com','http://neptunesxxlove.tumblr.com','http://instafashionistas.tumblr.com','http://jjubeiichigokn1me.tumblr.com','http://foreverdreamer33.tumblr.com','http://youngfitandhappy.tumblr.com','http://andrecaula.tumblr.com','http://dimakur.tumblr.com','http://sunlightcomescreepingin.tumblr.com','http://zayiflamaarkadasim.tumblr.com','http://vielartuzx.tumblr.com','http://sonyshock.tumblr.com','http://collecting-fallen-stars.tumblr.com','http://cerezobiyori.tumblr.com','http://nelsonreche.tumblr.com','http://bookslgw.tumblr.com','http://javier-perea.tumblr.com','http://decorarencasa.tumblr.com','http://creandopalabras.tumblr.com','http://violetap-rpura.tumblr.com','http://nefilimdesorientada.tumblr.com','http://soy-un-don-nadie.tumblr.com','http://someonelikeyous.tumblr.com','http://konyasaboy.tumblr.com','http://2552milesaway.tumblr.com','http://buscandoletras.tumblr.com','http://joseheinz.tumblr.com','http://everibodylies.tumblr.com','http://wilbertpinzon.tumblr.com','http://the-prinzessin-21-things.tumblr.com','http://solo-una-persona-mas-snaj.tumblr.com','http://welcometomylittletwistedworld.tumblr.com','http://cotamacarena.tumblr.com','http://sky--woman.tumblr.com','http://searching-for-a-wish.tumblr.com','http://nosinmibarba.tumblr.com','http://infiniterstolondon.tumblr.com','http://estupidotumbler.tumblr.com','http://russiathegreat.tumblr.com','http://xxxweigeexxx.tumblr.com','http://fsugatepost.tumblr.com','http://glamfades.tumblr.com','http://moegyoku.tumblr.com','http://kill4fornia.tumblr.com','http://fitghter.tumblr.com','http://erickerbyonline.tumblr.com','http://theflorencediaries.tumblr.com','http://sweetpea1970.tumblr.com','http://thewalkingoxymoron.tumblr.com','http://calliopesmuse.tumblr.com','http://symmetrycity.tumblr.com','http://jessiegogo.tumblr.com','http://karundercover.tumblr.com','http://crisspine.tumblr.com','http://kardashian-jenner-clan.tumblr.com','http://close-your-eyes-and-dreamx.tumblr.com','http://hogwarts-newgeneration.tumblr.com','http://zaynmeetsworld.tumblr.com','http://thedragonsire.tumblr.com','http://jeet84.tumblr.com','http://theoryandpraxis20.tumblr.com','http://sirius-black-padfoot.tumblr.com','http://havetorun.tumblr.com','http://lifeinthethirdworld.tumblr.com','http://brasileirosnotinder.tumblr.com','http://heldermota.tumblr.com','http://nighthawkok.tumblr.com','http://kris-ilda.tumblr.com','http://sassygetslean.tumblr.com','http://swaggerwassup.tumblr.com','http://lyricmpregcentral.tumblr.com','http://iwanttofitinmypants.tumblr.com','http://news.adamgnade.com','http://eye86.tumblr.com','http://bad-bitch-queen.tumblr.com','http://christiancarrera.com','http://liftheavyrunstrong.tumblr.com','http://iamyourhorrors.tumblr.com','http://www.junebabyproductions.com','http://wolfperson1.tumblr.com','http://ufavolkov.tumblr.com','http://gleamjust.tumblr.com','http://gymjargonfitness.tumblr.com','http://sagapogiapanta.tumblr.com','http://eatclean-behappy.tumblr.com','http://smylepritty.tumblr.com','http://aosterrr.tumblr.com','http://neonsighhs.tumblr.com','http://buffettinfo.tumblr.com','http://rapsodio.tumblr.com','http://cborders26.tumblr.com','http://alessmileever.tumblr.com','http://eliodora.tumblr.com','http://highhopesandcheekbones.tumblr.com','http://mutedcom.tumblr.com','http://thekicksgirl.tumblr.com','http://isikaaa.tumblr.com','http://tsvety-k.tumblr.com','http://workingitwithwendy.tumblr.com','http://gogogoldfish.tumblr.com','http://sp4ce-cat.tumblr.com','http://katesong20.tumblr.com','http://kriziaskloset.tumblr.com','http://remiee07.tumblr.com','http://mermaidsinmyass.tumblr.com','http://kaylankicksass.tumblr.com','http://emmasoderquist.tumblr.com','http://fujiberryfitness.tumblr.com','http://bieberdelcarter.tumblr.com','http://beautifully--imperfekt.tumblr.com','http://j-don-t-stop.tumblr.com','http://levfrommars.tumblr.com','http://chickenuqqet.tumblr.com','http://sprinklesontacos.tumblr.com','http://londontravelss.tumblr.com','http://meublant.tumblr.com','http://saundersfinearts.tumblr.com','http://mylifeunwasted.tumblr.com','http://lifeisuselesswithoutpizza.tumblr.com','http://rundrinkiowa.tumblr.com','http://finfinityd.tumblr.com','http://aboutuntappd.tumblr.com','http://stacheftl.tumblr.com','http://cheriserozexxx.tumblr.com','http://dragonwings1567.tumblr.com','http://stonebrewingco.tumblr.com','http://moda-venacava.tumblr.com','http://littleduck24.tumblr.com','http://randommomentsinhistory.tumblr.com','http://mimsolent.tumblr.com','http://rcelha.tumblr.com','http://troytwerner.tumblr.com','http://makeupbyhollywood.tumblr.com','http://pugsfromouterspace.com','http://brewstravelers365.tumblr.com','http://elf.achivy.com','http://corpseman.tumblr.com','http://tattooprinces.tumblr.com','http://dead-end-art.tumblr.com','http://theradioactiveangel.tumblr.com','http://tombllr.tumblr.com','http://dragonfire-dive.tumblr.com','http://la-gente-apesta.tumblr.com','http://kareesxxx.tumblr.com','http://thequeeninmyworld.tumblr.com','http://amp-a.tumblr.com','http://cokopagno.tumblr.com','http://stella-noelle.tumblr.com','http://translugia.tumblr.com','http://riri-taught-me.tumblr.com','http://sunserveyouth.tumblr.com','http://champeo.tumblr.com','http://what-ok.tumblr.com','http://universo-de-sentimentos.tumblr.com','http://officerstilinskihale.tumblr.com','http://kitttttttttttten.tumblr.com','http://chocolatesandwafflesandbeerohmy.tumblr.com','http://swanbeanies.tumblr.com','http://thesunandtheseasonslove.tumblr.com','http://awkwardworldbypaooliiciiouus.tumblr.com','http://speedcake33.tumblr.com','http://lovely-ladiez.tumblr.com','http://some-good-stuff.tumblr.com','http://wearasmileandshowrespect.tumblr.com','http://fallintopassion.tumblr.com','http://mtmixer.tumblr.com','http://fjskinny.tumblr.com','http://divinefitness.tumblr.com','http://orandzinessvajones.tumblr.com','http://afraidofcupcakes.tumblr.com','http://crickycarella.tumblr.com','http://sonisykes.tumblr.com','http://naughteebits.tumblr.com','http://mistressnightingale1956.tumblr.com','http://leiladeibele.tumblr.com','http://hootersman98.tumblr.com','http://kalosservices.tumblr.com','http://baby-jlee.tumblr.com','http://weonlylive4ever.tumblr.com','http://cryingcausetateandviolet.tumblr.com','http://paucity-plumage.tumblr.com','http://satanwantsmysoul.tumblr.com','http://bulletmatt.tumblr.com','http://away-with-you-kid.tumblr.com','http://prettysimpletattoos.tumblr.com','http://robinandre.tumblr.com','http://svenjalicious.tumblr.com','http://anambersmile.tumblr.com','http://purpleinparis.tumblr.com','http://soeppen.tumblr.com','http://siamocomeunpuzzle.tumblr.com','http://tlistedblog.tumblr.com','http://thepinkdocmartens.tumblr.com','http://feiyueshoesaustralia.tumblr.com','http://cocaine-and-shitty-pills.tumblr.com','http://joana-d-arte.tumblr.com','http://lovepink-bieber.tumblr.com','http://missdayday15.tumblr.com','http://daintyyetdangerous.tumblr.com','http://forthe-loveof-life.tumblr.com','http://oknope.tumblr.com','http://kukmor.tumblr.com','http://mariseca.tumblr.com','http://petricopter.tumblr.com','http://ohmyjulievu.tumblr.com','http://darriel-love.tumblr.com','http://perfectimmelmannturn.tumblr.com','http://crazylove-stupidlove.tumblr.com','http://sonsofsound.tumblr.com','http://binkscloset.tumblr.com','http://thingsbylindsy.tumblr.com','http://thegreatpuskas.tumblr.com','http://bakanohealthy.tumblr.com','http://omgnashville.tumblr.com','http://w-ineandroses.tumblr.com','http://orbis-pictus.tumblr.com','http://perfectlymeblr.tumblr.com','http://vodopadom.tumblr.com','http://lasaronemesis.tumblr.com','http://ipest.tumblr.com','http://pbloverandrunner.tumblr.com','http://thestoryofasapphicmuse.tumblr.com','http://xyurax.tumblr.com','http://aantwerp.tumblr.com','http://pred0minantt.tumblr.com','http://kitsunehales.tumblr.com','http://averagespatula.tumblr.com','http://porno-live.tumblr.com','http://ninkesouplit.tumblr.com','http://fuck1ng-wh0re.tumblr.com','http://secret-01.tumblr.com','http://btrceee.tumblr.com','http://dollienews.tumblr.com','http://bbwasses.tumblr.com','http://sketchbook-everyday.tumblr.com','http://amandarlongmuir.tumblr.com','http://iamsweetgloss.tumblr.com','http://lellavictoria.tumblr.com','http://cozyafternoons.tumblr.com','http://etxrnalis.tumblr.com','http://blissislost.tumblr.com','http://muto-shizuka.tumblr.com','http://dragonjammydodger.tumblr.com','http://neu-li.tumblr.com','http://anesthezea.tumblr.com','http://tallmanphoto.tumblr.com','http://perpetualloves.tumblr.com','http://birdmasterco.tumblr.com','http://ladyvybz.tumblr.com','http://andrew-halverson.tumblr.com','http://juanryan916.tumblr.com','http://tommyzh91.tumblr.com','http://cleokim.tumblr.com','http://oohsheila.tumblr.com','http://averyaster.tumblr.com','http://speaking-gently.tumblr.com','http://motors-nsk.ru','http://sail0nsilvergirl.tumblr.com','http://ralphturchiano.tumblr.com','http://super-princess-of-nerdvana.tumblr.com','http://borealisincognita.tumblr.com','http://uhohalybeupinthisshytxp.tumblr.com','http://batangchadia.tumblr.com','http://randomlynoriel.tumblr.com','http://theliterateblonde.tumblr.com','http://belette-libertine.tumblr.com','http://coombsgurl.tumblr.com','http://yzelglen.tumblr.com','http://hipsterlibertarian.com','http://gitabuga.tumblr.com','http://buzzofabee.tumblr.com','http://inkupthedarren.tumblr.com','http://buro247ru.tumblr.com','http://paulinepatootie.tumblr.com', 'http://shopghost.tumblr.com','http://littlemisskym.tumblr.com','http://chivalryneverdies.tumblr.com','http://clarifyingclarity.tumblr.com','http://runaway-fantasy.tumblr.com','http://wet-chrome.tumblr.com','http://chicgorgeous7.tumblr.com','http://bowtiesarefuckingc00l.tumblr.com','http://thaiibritto.tumblr.com','http://appleandtangerine.tumblr.com','http://paige-follows-the-sun.tumblr.com','http://eviedagger.tumblr.com','http://whatrachellikes.tumblr.com','http://alveenvaw.tumblr.com','http://duckshmoolife.tumblr.com','http://emmasdilemmasblog.tumblr.com','http://cashcratelifejourney.tumblr.com','http://punx-files.tumblr.com','http://vonstervdub.tumblr.com','http://rougeskinnies.tumblr.com','http://italia92.tumblr.com','http://iiihorrible.tumblr.com','http://iloveitwhenyoutalkdancetome.tumblr.com','http://chocoberries28.tumblr.com','http://derpycats.com','http://sarahs-adventures-in-baskerville.tumblr.com','http://quackyfuck.tumblr.com','http://fuckmeifyouloveme.tumblr.com','http://furefuwan.tumblr.com','http://sunshinee-dreams.tumblr.com','http://catleecious.tumblr.com','http://fistingofficial.tumblr.com','http://crissamore.tumblr.com','http://catasters.tumblr.com','http://leannaconley.tumblr.com','http://amylameee.tumblr.com','http://charlotteandbailey.tumblr.com','http://disnydreamr.tumblr.com','http://kriz-world.tumblr.com','http://89cats.tumblr.com','http://daemonium-ignis.tumblr.com','http://patpatthefatcat.tumblr.com','http://shasha-little-scorpio.tumblr.com','http://shavedgirlsonly.tumblr.com','http://prumpra.tumblr.com','http://ohmy-olivia.tumblr.com','http://katiewhy.tumblr.com','http://hamletitsnow.tumblr.com','http://letspiritsfly.tumblr.com','http://kevinp1992.tumblr.com','http://s9rah.tumblr.com','http://snowradio.tumblr.com','http://nathanteamoscar.tumblr.com','http://eroticphotographscollector.tumblr.com','http://beholdherecomesthedream.tumblr.com','http://the-fury-of-a-time-lord.tumblr.com','http://www.porngifs.pornonlock.com','http://hothardcoresexxx.tumblr.com','http://nielsenent.tumblr.com','http://graphic-digital.tumblr.com','http://wearesaviours.tumblr.com','http://jimmy3bigolin.tumblr.com','http://psycollective.tumblr.com','http://crazyshoppingdeals.tumblr.com','http://raychelanneee.tumblr.com','http://iamrickyhoover.tumblr.com','http://samprincesschester.tumblr.com','http://kittyandherlife.tumblr.com','http://wintersonnenengel.tumblr.com','http://lewinlight.tumblr.com','http://mystic-revelations.tumblr.com','http://charlieengelman.tumblr.com','http://krystalclara.tumblr.com','http://succulentbliss.tumblr.com','http://psychoeosreisdolixo.tumblr.com','http://hipriestess.tumblr.com','http://floralgreen-colorscheme.tumblr.com','http://ithinkyoulllikethissong.tumblr.com','http://profundocartel.tumblr.com','http://we-are-the-sickness.tumblr.com','http://justcreativelittleme.tumblr.com','http://inemjanuari.tumblr.com','http://soignefashion.tumblr.com','http://hatfullofwhy.tumblr.com'] print "Fetching %s urls" % len(urls) with Timer() as t: data, num_failures, num_success = Scraper.scrape(urls, DummyScraperClass, path_to_proxies, headers) print "Fetched %s urls in %.03f" % (len(urls), t.interval) print " %s failed, %s suceeded" % (num_failures, num_success)
# import codecs SCRAPER_CACHE_DIR = '/home/bobbruno/workspace/BeerApp/dumps/' accounts = [('Eo8xsuVxwG1V', '*****@*****.**', 'Eo8xsuVxwG1V01'), ('928vr9z45T7b', '*****@*****.**', '928vr9z45T7b01'), ('sYprtRIeu2GF', '*****@*****.**', 'sYprtRIeu2GF01')] uaS = ['Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1985.67 Safari/537.36'] beerScraper = Scraper(sleepTime=3, cacheDir=SCRAPER_CACHE_DIR, verbose=True, uaString=uaS[1]) beerParser = Parser(scraper=beerScraper, loc=SCRAPER_CACHE_DIR) beerParser.limitCountries(set([u'England', u'Ireland', u'Other United Kingdom', u'Belgium', u'Germany', u'United States'])) beerScraper.login(accounts[0][0], accounts[0][2], 'http://www.ratebeer.com/login.asp', 'signin', 'http://www.ratebeer.com') beerData = beerParser.parseContinents('http://www.ratebeer.com/breweries/') # Now let's get some beers from a brewery # I can get: # Associated place: <span class="beerfoot"></span>. # nextSibling.nextSibling.firstChild: get href and text # website: from previous last sibling, nextSibling.firstChild:get href and text # facebook,
import time print "Amazon Scraper: Where all your frugal dreams come true.\n" url = "http://www.amazon.com/gp/product/" db = MongoDBHelper.setupDB() database = db.ItemCollection cursor = database.items.find() if cursor.count() > 0: for document in cursor: print "Updating price data for the day... please wait...\n" temp_product_code = document['product_code'] updateScrape = Scraper.amazonScrape(url, temp_product_code) #If scraping successful, update data for current item, else continue if updateScrape != -1: tempItem = Item(updateScrape['name'], updateScrape['price_history'], temp_product_code) MongoDBHelper.updatePriceHistory(tempItem, database) time.sleep(60) choice = 0 while choice != 4: print "1. Define new item\n" print "2. Check current items\n" print "3. Delete an item\n"
def main(): scraper = Scraper("") ventries = scraper.getVirusEntries(20, 0) gentries = scraper.getGenomeEntries(60, 0) write_entries(ventries, "virus") write_entries(gentries, "genome")
from Scraper import Scraper # This is a simple application that uses the Scraper class from scraper.py # This application was used to gather all of the course information from # ninjacourse.com and the UCSB official course catalog website # keep in mind that a new instance of Scraper must be used every time # you are scraping information from a new page # Except when using the getAllCourses() method, as this method takes care # of the url attribute change for you. deptsURL = "http://ninjacourses.com/explore/4/" deptUrlStub = deptsURL + "department/" deptScraper = Scraper(deptsURL) courseScraper = Scraper(deptUrlStub+"ANTH") #deptList = deptScraper.getDepts() deptCodes = deptScraper.getDeptStubs() #anthCourses = courseScraper.getCourses() courseDict = deptScraper.getAllCourses(deptCodes) # for item in deptList: # print item # for item in deptCodes: # print item