예제 #1
0
    def test_string_column_labelling(self):
        tagger = GeoTagger('localhost', 27017)

        yacp = YACParser(filename='testdata/AdressenJHB.csv', sample_size=1800)
        tables = parseDataTables(yacp)
        t = tables[0]
        for i, row in enumerate(t.columnIter()):
            tagger.string_column(row)
예제 #2
0
    def test_csv(self):
        client = MongoClient('localhost', 27017)
        tagger = OSMTagger(client)

        db = client.geostore
        q = db.geonames.find({'admin_level': 6, 'parent': "http://sws.geonames.org/2769848/", "country" : "http://sws.geonames.org/2782113/"})

        r_tmp = [get_geonames_id(r['_id']) for r in q]
        regions = []
        for r in r_tmp:
            regions.append(r)
            q = db.geonames.find({'admin_level': 8, 'parent': r,
                                  "country": "http://sws.geonames.org/2782113/"})
            for sub_r in q:
                regions.append(get_geonames_id(sub_r['_id']))

        yacp = YACParser(filename='testdata/AdressenJHB.csv', sample_size=1800)
        tables = parseDataTables(yacp)
        t = tables[0]
        for i, row in enumerate(t.columnIter()):
            tagger.label_values(row, regions)
예제 #3
0
def csvclean_service(url):
    '''
    returns parsed table object from the YACParser
    '''
    table = YACParser(url=url, sample_size=100)
    return table