Ejemplo n.º 1
0
    def handle_label(self, filename, **options):
        current_generation = Generation.objects.current()
        new_generation = Generation.objects.new()
        if not new_generation:
            raise Exception("No new generation to be used for import!")

        print(filename)

        # Need to parse the KML manually to get the ExtendedData
        kml_data = KML()
        xml.sax.parse(filename, kml_data)

        code_type_osm = CodeType.objects.get(code='osm')
        code_type_n5000 = CodeType.objects.get(code='n5000')

        ds = DataSource(filename)
        layer = ds[0]
        for feat in layer:
            name = feat['Name'].value
            if not isinstance(name, six.text_type):
                name = name.decode('utf-8')
            name = re.sub('\s+', ' ', name)
            print("  %s" % smart_str(name))

            code = int(kml_data.data[name]['ref'])
            if code == 301:  # Oslo ref in OSM could be either 3 (fylke) or 301 (kommune). Make sure it's 3.
                code = 3
            if code < 100:  # Not particularly nice, but fine
                area_code = 'NFY'
                parent_area = None
                code_str = '%02d' % code
            else:
                area_code = 'NKO'
                code_str = '%04d' % code
                parent_area = Area.objects.get(id=int(code_str[0:2]))

            def update_or_create():
                try:
                    m = Area.objects.get(id=code)
                except Area.DoesNotExist:
                    m = Area(
                        id=code,
                        name=name,
                        type=Type.objects.get(code=area_code),
                        country=Country.objects.get(code='O'),
                        parent_area=parent_area,
                        generation_low=new_generation,
                        generation_high=new_generation,
                    )

                if m.generation_high and current_generation and m.generation_high.id < current_generation.id:
                    raise Exception(
                        "Area %s found, but not in current generation %s" %
                        (m, current_generation))
                m.generation_high = new_generation

                g = feat.geom.transform(4326, clone=True)
                poly = [g]

                if options['commit']:
                    m.save()
                    for k, v in kml_data.data[name].items():
                        if k in ('name:smi', 'name:fi'):
                            lang = 'N' + k[5:]
                            m.names.update_or_create(
                                type=NameType.objects.get(code=lang),
                                defaults={'name': v})
                    m.codes.update_or_create(type=code_type_n5000,
                                             defaults={'code': code_str})
                    m.codes.update_or_create(
                        type=code_type_osm,
                        defaults={'code': int(kml_data.data[name]['osm'])})
                    save_polygons({code: (m, poly)})

            update_or_create()
            # Special case Oslo so it's in twice, once as fylke, once as kommune
            if code == 3:
                code, area_code, parent_area, code_str = 301, 'NKO', Area.objects.get(
                    id=3), '0301'
                update_or_create()
    def handle_label(self, directory_name, **options):
        current_generation = Generation.objects.current()

        if not os.path.isdir(directory_name):
            raise Exception, "'%s' is not a directory" % (directory_name, )

        os.chdir(directory_name)
        skip_up_to = None
        # skip_up_to = 'relation-80370'

        skipping = bool(skip_up_to)

        osm_elements_seen_in_new_data = set([])

        with open("/home/mark/difference-results.csv", 'w') as fp:
            csv_writer = csv.writer(fp)
            csv_writer.writerow([
                "ElementType", "ElementID", "ExistedPreviously",
                "PreviousEmpty", "PreviousArea", "NewEmpty", "NewArea",
                "SymmetricDifferenceArea", "GEOSEquals", "GEOSEqualsExact"
            ])

            for admin_directory in sorted(x for x in os.listdir('.')
                                          if os.path.isdir(x)):

                if not re.search('^[A-Z0-9]{3}$', admin_directory):
                    print "Skipping a directory that doesn't look like a MapIt type:", admin_directory

                if not os.path.exists(admin_directory):
                    continue

                files = sorted(os.listdir(admin_directory))
                total_files = len(files)

                for i, e in enumerate(files):

                    progress = "[%d%% complete] " % ((i * 100) / total_files, )

                    if skipping:
                        if skip_up_to in e:
                            skipping = False
                        else:
                            continue

                    if not e.endswith('.kml'):
                        continue

                    m = re.search(r'^(way|relation)-(\d+)-', e)
                    if not m:
                        raise Exception, u"Couldn't extract OSM element type and ID from: " + e

                    osm_type, osm_id = m.groups()

                    osm_elements_seen_in_new_data.add((osm_type, osm_id))

                    kml_filename = os.path.join(admin_directory, e)

                    # Need to parse the KML manually to get the ExtendedData
                    kml_data = KML()
                    print "parsing", kml_filename
                    xml.sax.parse(kml_filename, kml_data)

                    useful_names = [
                        n for n in kml_data.data.keys()
                        if not n.startswith('Boundaries for')
                    ]
                    if len(useful_names) == 0:
                        raise Exception, "No useful names found in KML data"
                    elif len(useful_names) > 1:
                        raise Exception, "Multiple useful names found in KML data"
                    name = useful_names[0]
                    print " ", name.encode('utf-8')

                    if osm_type == 'relation':
                        code_type_osm = CodeType.objects.get(code='osm_rel')
                    elif osm_type == 'way':
                        code_type_osm = CodeType.objects.get(code='osm_way')
                    else:
                        raise Exception, "Unknown OSM element type:", osm_type

                    ds = DataSource(kml_filename)
                    if len(ds) != 1:
                        raise Exception, "We only expect one layer in a DataSource"

                    layer = ds[0]
                    if len(layer) != 1:
                        raise Exception, "We only expect one feature in each layer"

                    feat = layer[0]

                    area_code = admin_directory

                    osm_codes = list(
                        Code.objects.filter(type=code_type_osm, code=osm_id))
                    osm_codes.sort(
                        key=lambda e: e.area.generation_high.created)

                    new_area = None
                    new_valid = None
                    new_empty = None

                    previous_area = None
                    previous_valid = None
                    previous_empty = None

                    symmetric_difference_area = None

                    g = feat.geom.transform(4326, clone=True)

                    new_some_nonempty = False
                    for polygon in g:
                        if polygon.point_count < 4:
                            new_empty = True
                        else:
                            new_some_nonempty = True
                    if not new_empty:
                        new_geos_geometry = g.geos.simplify(tolerance=0)
                        new_area = new_geos_geometry.area
                        new_empty = new_geos_geometry.empty

                    geos_equals = None
                    geos_equals_exact = None

                    most_recent_osm_code = None
                    if osm_codes:
                        most_recent_osm_code = osm_codes[-1]
                        previous_geos_geometry = most_recent_osm_code.area.polygons.collect(
                        )
                        previous_empty = previous_geos_geometry is None

                        if not previous_empty:
                            previous_geos_geometry = previous_geos_geometry.simplify(
                                tolerance=0)
                            previous_area = previous_geos_geometry.area

                            if not new_empty:
                                symmetric_difference_area = previous_geos_geometry.sym_difference(
                                    new_geos_geometry).area
                                geos_equals = previous_geos_geometry.equals(
                                    new_geos_geometry)
                                geos_equals_exact = previous_geos_geometry.equals_exact(
                                    new_geos_geometry)

                    csv_writer.writerow([
                        osm_type,
                        osm_id,
                        bool(osm_codes),  # ExistedPreviously
                        empty_if_none(previous_empty),
                        empty_if_none(previous_area),
                        empty_if_none(new_empty),
                        empty_if_none(new_area),
                        empty_if_none(symmetric_difference_area),
                        empty_if_none(geos_equals),
                        empty_if_none(geos_equals_exact)
                    ])
    def handle_label(self, directory_name, **options):
        current_generation = Generation.objects.current()
        new_generation = Generation.objects.new()
        if not new_generation:
            raise Exception("No new generation to be used for import!")

        if not os.path.isdir(directory_name):
            raise Exception("'%s' is not a directory" % (directory_name,))

        os.chdir(directory_name)

        mapit_type_glob = smart_text("[A-Z0-9][A-Z0-9][A-Z0-9]")

        if not glob(mapit_type_glob):
            raise Exception(
                "'%s' did not contain any directories that look like MapIt types (e.g. O11, OWA, etc.)" % (
                    directory_name,))

        def verbose(s):
            if int(options['verbosity']) > 1:
                print(smart_str(s))

        verbose("Loading any admin boundaries from " + directory_name)

        verbose("Finding language codes...")

        language_code_to_name = {}
        code_keys = ('two_letter', 'three_letter')
        for row in get_iso639_2_table():
            english_name = getattr(row, 'english_name')
            for k in code_keys:
                code = getattr(row, k)
                if not code:
                    continue
                language_code_to_name[code] = english_name

        global_country = Country.objects.get(code='G')

        # print json.dumps(language_code_to_name, sort_keys=True, indent=4)

        skip_up_to = None
        # skip_up_to = 'relation-80370'

        skipping = bool(skip_up_to)

        for type_directory in sorted(glob(mapit_type_glob)):

            verbose("Loading type " + type_directory)

            if not os.path.exists(type_directory):
                verbose("Skipping the non-existent " + type_directory)
                continue

            verbose("Loading all KML in " + type_directory)

            files = sorted(os.listdir(type_directory))
            total_files = len(files)

            for i, e in enumerate(files):

                progress = "[%d%% complete] " % ((i * 100) / total_files,)

                if skipping:
                    if skip_up_to in e:
                        skipping = False
                    else:
                        continue

                if not e.endswith('.kml'):
                    verbose("Ignoring non-KML file: " + e)
                    continue

                m = re.search(r'^(way|relation)-(\d+)-', e)
                if not m:
                    raise Exception("Couldn't extract OSM element type and ID from: " + e)

                osm_type, osm_id = m.groups()

                kml_filename = os.path.join(type_directory, e)

                verbose(progress + "Loading " + os.path.realpath(kml_filename))

                # Need to parse the KML manually to get the ExtendedData
                kml_data = KML()
                xml.sax.parse(smart_str(kml_filename), kml_data)

                useful_names = [n for n in kml_data.data.keys() if not n.startswith('Boundaries for')]
                if len(useful_names) == 0:
                    raise Exception("No useful names found in KML data")
                elif len(useful_names) > 1:
                    raise Exception("Multiple useful names found in KML data")
                name = useful_names[0]
                print(smart_str("  %s" % name))

                if osm_type == 'relation':
                    code_type_osm = CodeType.objects.get(code='osm_rel')
                elif osm_type == 'way':
                    code_type_osm = CodeType.objects.get(code='osm_way')
                else:
                    raise Exception("Unknown OSM element type: " + osm_type)

                ds = DataSource(kml_filename)
                layer = ds[0]
                if len(layer) != 1:
                    raise Exception("We only expect one feature in each layer")

                feat = layer[1]

                g = feat.geom.transform(4326, clone=True)

                if g.geom_count == 0:
                    # Just ignore any KML files that have no polygons in them:
                    verbose('    Ignoring that file - it contained no polygons')
                    continue

                # Nowadays, in generating the data we should have
                # excluded any "polygons" with less than four points
                # (the final one being the same as the first), but
                # just in case:
                polygons_too_small = 0
                for polygon in g:
                    if polygon.num_points < 4:
                        polygons_too_small += 1
                if polygons_too_small:
                    message = "%d out of %d polygon(s) were too small" % (polygons_too_small, g.geom_count)
                    verbose('    Skipping, since ' + message)
                    continue

                g_geos = g.geos

                if not g_geos.valid:
                    verbose("    Invalid KML:" + kml_filename)
                    fixed_multipolygon = fix_invalid_geos_multipolygon(g_geos)
                    if len(fixed_multipolygon) == 0:
                        verbose("    Invalid polygons couldn't be fixed")
                        continue
                    g = fixed_multipolygon.ogr

                area_type = Type.objects.get(code=type_directory)

                try:
                    osm_code = Code.objects.get(type=code_type_osm,
                                                code=osm_id,
                                                area__generation_high__lte=current_generation,
                                                area__generation_high__gte=current_generation)
                except Code.DoesNotExist:
                    verbose('    No area existed in the current generation with that OSM element type and ID')
                    osm_code = None

                was_the_same_in_current = False

                if osm_code:
                    m = osm_code.area

                    # First, we need to check if the polygons are
                    # still the same as in the previous generation:
                    previous_geos_geometry = m.polygons.aggregate(Collect('polygon'))['polygon__collect']
                    if previous_geos_geometry is None:
                        verbose('    In the current generation, that area was empty - skipping')
                    else:
                        # Simplify it to make sure the polygons are valid:
                        previous_geos_geometry = shapely.wkb.loads(
                            str(previous_geos_geometry.simplify(tolerance=0).ewkb))
                        new_geos_geometry = shapely.wkb.loads(str(g.geos.simplify(tolerance=0).ewkb))
                        if previous_geos_geometry.almost_equals(new_geos_geometry, decimal=7):
                            was_the_same_in_current = True
                        else:
                            verbose('    In the current generation, the boundary was different')

                if was_the_same_in_current:
                    # Extend the high generation to the new one:
                    verbose('    The boundary was identical in the previous generation; raising generation_high')
                    m.generation_high = new_generation

                else:
                    # Otherwise, create a completely new area:
                    m = Area(
                        name=name,
                        type=area_type,
                        country=global_country,
                        parent_area=None,
                        generation_low=new_generation,
                        generation_high=new_generation,
                    )

                poly = [g]

                if options['commit']:
                    m.save()
                    verbose('    Area ID: ' + str(m.id))

                    if name not in kml_data.data:
                        print(json.dumps(kml_data.data, sort_keys=True, indent=4))
                        raise Exception("Will fail to find '%s' in the dictionary" % (name,))

                    old_lang_codes = set(n.type.code for n in m.names.all())

                    for k, translated_name in kml_data.data[name].items():
                        language_name = None
                        if k == 'name':
                            lang = 'default'
                            language_name = "OSM Default"
                        else:
                            name_match = re.search(r'^name:(.+)$', k)
                            if name_match:
                                lang = name_match.group(1)
                                if lang in language_code_to_name:
                                    language_name = language_code_to_name[lang]
                        if not language_name:
                            continue
                        old_lang_codes.discard(lang)

                        # Otherwise, make sure that a NameType for this language exists:
                        NameType.objects.update_or_create(code=lang, defaults={'description': language_name})
                        name_type = NameType.objects.get(code=lang)

                        m.names.update_or_create(type=name_type, defaults={'name': translated_name})

                    if old_lang_codes:
                        verbose('Removing deleted languages codes: ' + ' '.join(old_lang_codes))
                    m.names.filter(type__code__in=old_lang_codes).delete()
                    # If the boundary was the same, the old Code
                    # object will still be pointing to the same Area,
                    # which just had its generation_high incremented.
                    # In every other case, there's a new area object,
                    # so create a new Code and save it:
                    if not was_the_same_in_current:
                        new_code = Code(area=m, type=code_type_osm, code=osm_id)
                        new_code.save()
                    save_polygons({'dummy': (m, poly)})
Ejemplo n.º 4
0
    def handle_label(self, directory_name, **options):
        current_generation = Generation.objects.current()
        new_generation = Generation.objects.new()
        if not new_generation:
            raise Exception, "No new generation to be used for import!"

        if not os.path.isdir(directory_name):
            raise Exception, "'%s' is not a directory" % (directory_name, )

        os.chdir(directory_name)

        if not glob("al[0-1][0-9]"):
            raise Exception, "'%s' did not contain any admin level directories (e.g. al02, al03, etc.)" % (
                directory_name, )

        def verbose(s):
            if options['verbose']:
                print s.encode('utf-8')

        verbose("Loading any admin boundaries from " + directory_name)

        verbose("Finding language codes...")

        language_code_to_name = {}
        code_keys = ('two_letter', 'three_letter')
        for row in get_iso639_2_table():
            english_name = getattr(row, 'english_name')
            for k in code_keys:
                code = getattr(row, k)
                if not code:
                    continue
                # Some of the language codes have a bibliographic or
                # terminology code, so strip those out:
                codes = re.findall(r'(\w+) \([BT]\)', code)
                if not codes:
                    codes = [code]
                for c in codes:
                    language_code_to_name[c] = english_name

        # print json.dumps(language_code_to_name, sort_keys=True, indent=4)

        skip_up_to = None
        # skip_up_to = 'relation-80370'

        skipping = bool(skip_up_to)

        for admin_level in range(2, 12):

            verbose("Loading admin_level " + str(admin_level))

            admin_directory = "al%02d" % (admin_level)

            if not os.path.exists(admin_directory):
                verbose("Skipping the non-existent " + admin_directory)
                continue

            verbose("Loading all KML in " + admin_directory)

            files = sorted(os.listdir(admin_directory))
            total_files = len(files)

            for i, e in enumerate(files):

                progress = "[%d%% complete] " % ((i * 100) / total_files, )

                if skipping:
                    if skip_up_to in e:
                        skipping = False
                    else:
                        continue

                if not e.endswith('.kml'):
                    verbose("Ignoring non-KML file: " + e)
                    continue

                m = re.search(r'^(way|relation)-(\d+)-', e)
                if not m:
                    raise Exception, u"Couldn't extract OSM element type and ID from: " + e

                osm_type, osm_id = m.groups()

                kml_filename = os.path.join(admin_directory, e)

                verbose(progress + "Loading " +
                        unicode(os.path.realpath(kml_filename), 'utf-8'))

                # Need to parse the KML manually to get the ExtendedData
                kml_data = KML()
                xml.sax.parse(kml_filename, kml_data)

                useful_names = [
                    n for n in kml_data.data.keys()
                    if not n.startswith('Boundaries for')
                ]
                if len(useful_names) == 0:
                    raise Exception, "No useful names found in KML data"
                elif len(useful_names) > 1:
                    raise Exception, "Multiple useful names found in KML data"
                name = useful_names[0]
                print " ", name.encode('utf-8')

                if osm_type == 'relation':
                    code_type_osm = CodeType.objects.get(code='osm_rel')
                elif osm_type == 'way':
                    code_type_osm = CodeType.objects.get(code='osm_way')
                else:
                    raise Exception, "Unknown OSM element type:", osm_type

                ds = DataSource(kml_filename)
                layer = ds[0]
                if len(layer) != 1:
                    raise Exception, "We only expect one feature in each layer"
                for feat in layer:

                    area_code = 'O%02d' % (admin_level)

                    # FIXME: perhaps we could try to find parent areas
                    # via inclusion in higher admin levels
                    parent_area = None

                    try:
                        osm_code = Code.objects.get(type=code_type_osm,
                                                    code=osm_id)
                    except Code.DoesNotExist:
                        osm_code = None

                    def update_or_create():
                        if osm_code:
                            m = osm_code.area
                        else:
                            m = Area(
                                name=name,
                                type=Type.objects.get(code=area_code),
                                country=Country.objects.get(code='G'),
                                parent_area=parent_area,
                                generation_low=new_generation,
                                generation_high=new_generation,
                            )

                        if m.generation_high and current_generation and m.generation_high.id < current_generation.id:
                            raise Exception, "Area %s found, but not in current generation %s" % (
                                m, current_generation)
                        m.generation_high = new_generation

                        g = feat.geom.transform(4326, clone=True)

                        # In generating the data we should have
                        # excluded any "polygons" with less than four
                        # points (the final one being the same as the
                        # first), but just in case:
                        for polygon in g:
                            if g.num_points < 4:
                                return

                        poly = [g]

                        if options['commit']:
                            m.save()

                            if name not in kml_data.data:
                                print json.dumps(kml_data.data,
                                                 sort_keys=True,
                                                 indent=4)
                                raise Exception, u"Will fail to find '%s' in the dictionary" % (
                                    name, )

                            for k, v in kml_data.data[name].items():
                                language_name = None
                                if k == 'name':
                                    lang = 'default'
                                    language_name = "OSM Default"
                                else:
                                    name_match = re.search(r'^name:(.+)$', k)
                                    if name_match:
                                        lang = name_match.group(1)
                                        if lang in language_code_to_name:
                                            language_name = language_code_to_name[
                                                lang]
                                if not language_name:
                                    continue
                                # Otherwise, make sure that a NameType for this language exists:
                                NameType.objects.update_or_create(
                                    {'code': lang}, {
                                        'code': lang,
                                        'description': language_name
                                    })
                                name_type = NameType.objects.get(code=lang)
                                m.names.update_or_create({'type': name_type},
                                                         {'name': v})
                            m.codes.update_or_create({'type': code_type_osm},
                                                     {'code': osm_id})
                            save_polygons({code: (m, poly)})

                    update_or_create()