def main(): cities_json = json.load(open(CITIES_FILE)) pop_cities = sorted(cities_json, lambda o, _: int(o['population']), reverse=True) # print pop_cities[:20] cities = [o['city'] for o in pop_cities][:20] jsonl_iter = JSONLIterator(open(BRIEFINGS_FILE)) res = defaultdict(list) for obj in jsonl_iter: title = obj['title'] briefing_html = obj['content'] content_tree = soupparser.fromstring(briefing_html) pane_tree = content_tree.cssselect('.pane-node-field-forall-body') briefing_text = pane_tree[0].text_content() for city in cities: if city in briefing_text: res[city].append(title) print 'found', repr(city), 'in', repr(title) omd = OMD() for k in res: omd.addlist(k, res[k]) top_items = sorted(omd.counts().items(), key=lambda x: x[1], reverse=True) import pdb pdb.set_trace()
def test_setdefault(): omd = OMD() empty_list = [] x = omd.setdefault('1', empty_list) assert x is empty_list y = omd.setdefault('2') assert y is None assert omd.setdefault('1', None) is empty_list e_omd = OMD() e_omd.addlist(1, []) assert e_omd.popall(1, None) is None assert len(e_omd) == 0
def test_addlist(): omd = OMD() omd.addlist('a', [1, 2, 3]) omd.addlist('b', [4, 5]) assert omd.keys() == ['a', 'b'] assert len(list(omd.iteritems(multi=True))) == 5 e_omd = OMD() e_omd.addlist('a', []) assert e_omd.keys() == [] assert len(list(e_omd.iteritems(multi=True))) == 0