def scrape(html, filter=r".*"): soup = BeautifulSoup(html) tbl = soup.find('table', class_='data') trs = tbl.find_all('tr') for j in range(1,len(trs)-1,2): tds1 = trs[j].find_all('td') # Date and speaker tds2 = trs[j+1].find_all('td') # Blank and abstract rec = TalkRecord() rec.series = "mit-amc" rec.datetime = scrape_date(tds1[0]) if len(tds2) < 2: continue rec.speaker = tds1[1].get_text().encode("utf8") if not re.match(filter, rec.date()): continue rec.title = pack_ws(tds2[1].get_text().encode("utf8")) rec.url = "{0}{1}".format(URL, tds2[1].find('a').get('href')) if rec.title != '': rec.writes()