def test_refreshShowIndex(): latest = ShowScrape.parseShowIndexPage(ShowScrape.fetchPage(ShowScrape.INDEX_URL)) assert len(latest["shows"]) > 0 span = 3 until = int(latest["shows"][0]["number"]) - span num_shows_processed = ShowScrape.refreshShowIndex(until=until) assert num_shows_processed == span
def test_refreshShowIndex(): show_cnt = ShowBusiness.getShowCount() newnum = ShowScrape.refreshShowIndex(650) assert ShowBusiness.getShowCount() == newnum+show_cnt ## do it twice and make sure it doesn't repeat ShowScrape.refreshShowIndex(600) assert ShowBusiness.getShowCount() == newnum+show_cnt
def test_fetchPage(): """ for now just check that it actually gets a webpage. """ res = ShowScrape.fetchPage(ShowScrape.BASE_URL) assert len(res) > 0 assert res.find("DOCTYPE") > -1
def test_parseTrackJson(): f = open(PAGE_HTML, "r+") meta = ShowScrape.extractTrackJson(f.read()) assert type(meta) == DictType assert meta.has_key("trackextra") assert meta.has_key("tracks") assert meta.has_key("favorited") assert type(meta["trackextra"]) == ListType for i in range(1, len(meta["trackextra"])): assert type(meta["trackextra"][i]) == ListType assert len(meta["trackextra"][i]) > 0 assert meta["trackextra"][i][0].has_key("a") assert meta["trackextra"][i][0].has_key("t") assert type(meta["tracks"]) == DictType assert meta["tracks"].has_key("tracks") assert meta["tracks"].has_key("showdate") assert meta["tracks"].has_key("mstotal") assert meta["tracks"].has_key("shownum") assert type(meta["tracks"]["tracks"]) == ListType for tr in meta["tracks"]["tracks"]: assert type(tr) == DictType assert tr.has_key("mspos") assert type(meta["favorited"]) == ListType assert len(meta["favorited"]) == len(meta["tracks"]["tracks"])
def test_extractShowNumFromIndexNode(): node = BeautifulSoup(INDEX_SHOW_ENTRY["text"]) num = ShowScrape._extractShowNumFromIndexNode(node) assert num == INDEX_SHOW_ENTRY["number"] try: int(num) works = True except: works = False assert works == True
def test_scrapeAndStore(): show_info = ShowScrape.parseShowPage(open(SHOW_PAGE,"r").read()) assert len(show_info["tracks"]) > 0 sh_cnt = ShowBusiness.getShowCount() show = ShowBusiness.addShow(**show_info) assert ShowBusiness.getShowCount() == sh_cnt+1 assert ShowBusiness.getShow(show_info["number"]) != None assert ShowBusiness.getShowTrackCount(show_info["number"]) > 0
def test_parseShowPage(): f = open(PAGE_HTML, "r") meta = ShowScrape.parseShowPage(f.read()) assert type(meta) == DictType ## test the structure attrs = ( "number", "date", "desc", "tracks", "mstotal", "credits", "nextshow_url", "prevshow_url", "audio_url", "page_url", "img_url", "name", ) for attr in attrs: assert meta.has_key(attr) assert len(meta["desc"]) > 0 assert len(meta["credits"]) > 0 assert type(meta["date"]) == type(date.today()) assert type(meta["tracks"]) == ListType assert len(meta["tracks"]) > 0 assert type(meta["number"]) == IntType assert type(meta["mstotal"]) == IntType for track in meta["tracks"]: assert type(track) == DictType assert track.has_key("start_mspos") assert track.has_key("artist") assert track.has_key("title")
def test_parseIndexPage(): f = open(INDEX_HTML, "r") meta = ShowScrape.parseShowIndexPage(f.read()) assert type(meta) == DictType assert meta.has_key("shows") assert meta.has_key("newest") assert meta.has_key("oldest") ## data integrity assert type(meta["shows"]) == ListType assert len(meta["shows"]) > 0 for s in meta["shows"]: assert int(s["number"]) <= int(meta["newest"]["number"]) for s in meta["shows"]: assert int(s["number"]) >= int(meta["oldest"]["number"]) ## individual show attrs str_attrs = ("name", "href", "number") for show in meta["shows"]: for attr in str_attrs: assert show.has_key(attr) if type(show[attr]) == StringType: assert len(show[attr]) > 0
def update_shows(): tot = ShowScrape.refreshShowIndex() return "%s shows scheduled for update" % (tot,)
def update_one_show(show_url): ShowScrape.addShowFromPage(show_url) return ''
def getIndex(): return str(ShowScrape.getShowsList()) return "done"
def test_extractNameFromIndexNode(): node = BeautifulSoup(INDEX_SHOW_ENTRY["text"]) assert ShowScrape._extractNameFromIndexNode(node) == unicode(INDEX_SHOW_ENTRY["name"])
def test_scrapeAndStoreUnicode(): show_info = ShowScrape.parseShowPage(urlopen(SHOW_PAGE_UNICODE).read())