def test_in_the_news(self): from tablib import Dataset, Databook data = Dataset() mention = Mention("http://www.lvbusinesspress.com/articles/2011/11/28/news/iq_49033368.txt") mention.append(data) mention = Mention("http://www.lasvegasgleaner.com/las_vegas_gleaner/2011/12/unwitting-local-tools-of-corporate-overlords-hire-a-lawyer.html - NPRI in the News") mention.append(data) self.assertEqual(len(data), 2) self.assertEqual(len(data.filter(['in-the-news'])), 1)
from core import HEADERS from core.parsers.mention import Mention date_prefix = datetime.date.today().strftime("%Y%m%d") parser = ArgumentParser() parser.add_argument("input", metavar="INPUT", type=open) parser.add_argument("-o", "--output", default="%s-newsclips.xls" % date_prefix) args = parser.parse_args() logging.basicConfig(level=logging.DEBUG) log = logging.getLogger("newsclips.main") data = tablib.Dataset(headers=HEADERS) for line in args.input: item = {} line = line.strip() if not line: continue mention = Mention(line) mention.append(data) book = tablib.Databook((data, data.filter(["in-the-news"]))) with open(args.output, "wb") as fp: fp.write(book.xls)