예제 #1
0
def match():
    preassigned =  set([p for p in OSM_Place.all() if p.simc_place])
    assigned = set(preassigned)
    reporting.output_msg("start", u"%i wstępnie (w danych OSM) przypisanych miejscowości" % (len(preassigned),))
    places_to_match = set([p for p in OSM_Place.all() if not p.simc_place])
    osm_matched1, simc_matched1 = match_names(1, places_to_match)
    assigned |=  osm_matched1
    grid = Grid(assigned, 31, 31)
    osm_matched2, simc_matched2 = match_names(2, places_to_match, grid)
    assigned |= osm_matched2
    grid = Grid(assigned, 43, 43)
    osm_matched3, simc_matched3 = match_names(3, places_to_match, grid)
    assigned |= osm_matched3
    matched = osm_matched1 | osm_matched2 | osm_matched3
    matched = refine(matched, assigned)
    assigned = set(preassigned).union(matched)
    return assigned
예제 #2
0
def match_names(pass_no, places_to_match, grid = None):
    reporting = Reporting()
    places_count = len(places_to_match)
    if grid:
        reporting.progress_start(
                u"Dopasowywanie nazw %i miejsc, przebieg %i, z siatką %s"
                        % (places_count, pass_no, grid), places_count)
    else:
        reporting.progress_start(
                u"Dopasowywanie nazw %i miejsc, przebieg %i"
                        % (places_count, pass_no), places_count)
    osm_matched = set()
    simc_matched = set()
    places = [ (str(p), p) for p in places_to_match ]
    for name, osm_place in places:
        reporting.progress()
        if osm_place.name is None:
            reporting.output_msg("errors", u"%r: brak nazwy" % (osm_place,), osm_place)
            continue

        # Find matching entry in SIMC
        try:
            matching_simc_places = SIMC_Place.by_name(osm_place.name)
        except KeyError:
            reporting.output_msg("not_found", u"%s: nie znaleziono w TERYT" 
                                                    % (osm_place,), osm_place)
            places_to_match.remove(osm_place)
            continue
        simc_places = [place for place in matching_simc_places 
                                if place.type == osm_place.normalized_type
                                    and place.osm_place is None]
        if not simc_places:
            types_found = [ place.type for place in matching_simc_places ]
            reporting.output_msg("bad_type", u"%s: nie znalezionow w TERYT"
                        u" obiektu właściwego typu (%r, znaleziono: %r)" % (
                            osm_place, osm_place.type, types_found), osm_place)
            continue

        cell = None
        if grid:
            try:
                cell = grid.get_cell(osm_place)
            except KeyError:
                pass
        if cell:
            simc_places = [ p for p in simc_places if p.powiat in cell.powiaty ]
            if len(simc_places) > 1:
                simc_places = [ p for p in simc_places if p.gmina in cell.gminy ]
            if not simc_places:
                reporting.output_msg("not_found",
                        u"%s: nie znaleziono w TERYT miejsca"
                        u" pasującego do komórki %s" % (osm_place, cell),
                        osm_place)
                continue

        if len(simc_places) > 1:
            if grid:
                reporting.output_msg("ambigous%i" % (pass_no,), 
                        u"%s z OSM pasuje do wielu obiektów"
                        u" SIMC w komórce %s: %s" % (osm_place, cell,
                            u", ".join([str(p) for p in simc_places])), 
                                                                osm_place)
            else:
                reporting.output_msg("ambigous%i" % (pass_no,), 
                        u"%s z OSM pasuje do wielu obiektów w SIMC: %s" % (osm_place,
                            u", ".join([str(p) for p in simc_places])), osm_place)
            continue
        simc_place = simc_places[0]

        # now check if reverse assignment is not ambigous
        matching_osm_places = OSM_Place.by_name(simc_place.name)
        confl_osm_places = []
        for place in matching_osm_places:
            if place is osm_place:
                continue
            if cell:
                try:
                    g_cell = grid.get_cell(place) 
                except KeyError:
                    g_cell = None
                if g_cell is not cell:
                    continue
            if place.gmina and place.gmina != simc_place.gmina:
                continue
            if place.powiat and place.powiat != simc_place.powiat:
                continue
            if place.wojewodztwo and place.wojewodztwo != simc_place.wojewodztwo:
                continue
            confl_osm_places.append(place)

        if confl_osm_places:
            reporting.output_msg("ambigous%i" % (pass_no,), 
                        u"%s z SIMC pasuje do wielu obiektów w OMS: %s" % (simc_place,
                            ", ".join([str(p) for p in confl_osm_places])), osm_place)
            continue
        
        if simc_place.osm_place:
            reporting.output_msg("ambigous%i" % (pass_no,), 
                    u"%s z SIMC ma już przypisany obiekt OSM: %s" % (
                        simc_place, simc_place.osm_place), osm_place)

        # good match
        osm_place.assign_simc(simc_place)
        simc_place.assign_osm(osm_place)

        reporting.output_msg("match", u"%s w OSM to %s w SIMC" % (osm_place, simc_place), osm_place) 
        osm_matched.add(osm_place)
        simc_matched.add(simc_place)
        places_to_match.remove(osm_place)

    reporting.progress_stop()
    reporting.output_msg("stats", 
            u"Przebieg %i: znaleziono w SIMC %i z %i miejscowości OSM" % (
                                    pass_no, len(osm_matched), places_count))
    return osm_matched, simc_matched