Esempio n. 1
0
def main():
    cities_json = json.load(open(CITIES_FILE))
    pop_cities = sorted(cities_json,
                        lambda o, _: int(o['population']),
                        reverse=True)
    # print pop_cities[:20]
    cities = [o['city'] for o in pop_cities][:20]
    jsonl_iter = JSONLIterator(open(BRIEFINGS_FILE))

    res = defaultdict(list)

    for obj in jsonl_iter:
        title = obj['title']
        briefing_html = obj['content']

        content_tree = soupparser.fromstring(briefing_html)
        pane_tree = content_tree.cssselect('.pane-node-field-forall-body')
        briefing_text = pane_tree[0].text_content()

        for city in cities:
            if city in briefing_text:
                res[city].append(title)
                print 'found', repr(city), 'in', repr(title)

    omd = OMD()
    for k in res:
        omd.addlist(k, res[k])
    top_items = sorted(omd.counts().items(), key=lambda x: x[1], reverse=True)

    import pdb
    pdb.set_trace()
Esempio n. 2
0
def test_setdefault():
    omd = OMD()
    empty_list = []
    x = omd.setdefault('1', empty_list)
    assert x is empty_list
    y = omd.setdefault('2')
    assert y is None
    assert omd.setdefault('1', None) is empty_list

    e_omd = OMD()
    e_omd.addlist(1, [])
    assert e_omd.popall(1, None) is None
    assert len(e_omd) == 0
Esempio n. 3
0
def test_addlist():
    omd = OMD()
    omd.addlist('a', [1, 2, 3])
    omd.addlist('b', [4, 5])

    assert omd.keys() == ['a', 'b']
    assert len(list(omd.iteritems(multi=True))) == 5

    e_omd = OMD()
    e_omd.addlist('a', [])
    assert e_omd.keys() == []
    assert len(list(e_omd.iteritems(multi=True))) == 0