Ejemplo n.º 1
0
def start(args):
    if args.justPublish:
        Hamelin(vars(args), None)
    else:
        connection = ConnectionPool()

        if connection.isAvailable():
            path = args.path

            if os.path.exists(path):
                if os.path.isdir(path):

                    pathsToProcess = []
                    skipFirst = True
                    for root, dirnames, filenames in os.walk(path):
                        if skipFirst and len(dirnames) > 0:
                            skipFirst = False
                            continue

                        arguments = vars(args).copy()
                        arguments["path"] = root

                        Hamelin(arguments, connection).export()
                        connection.closeAll()
                else:
                    Hamelin(vars(args), connection).export()
                    connection.closeAll()
        else:
            print("error conectando a PostGIS", Logs.ERROR)
Ejemplo n.º 2
0
def extract( path, ckanID ):
    unzippedDir = "unzipped_" + ckanID

    acceptedDirectory = "conjunto_de_datos" if "kmz" not in path else None

    print("Extrayendo en " + unzippedDir, Logs.INFO )

    if utils.extractZipFile(path, unzippedDir):
        return utils.listAllFiles(unzippedDir, acceptedFiles, acceptedDirectory)
Ejemplo n.º 3
0
def process(file,
            datasetName,
            outputDir,
            rawData=None,
            removeInvalidProperties=False):
    rawData = rawData if rawData is not None else utils.getEncodedFileContent(
        file)[0]

    kml2geojson.main.GEOTYPES = ["Polygon", "LineString", "Point"]

    try:
        data = rawData

        # remove all Document properties
        if removeInvalidProperties:
            data = re.compile("<Documen[^>]*>").sub("<Document>", data)

        geojson = kml2geojson.main.build_feature_collection(
            md.parseString(data))

        for element in geojson["features"]:
            # extract data from nested tables
            properties = {}
            for prop in element["properties"]:
                value = str(element["properties"][prop])

                if "<table" in value:
                    page = htmlParser.document_fromstring(value)

                    for row in page.xpath("body/table")[0].findall("tr"):
                        childs = row.findall("td")
                        if len(childs) == 2:
                            variableName = utils.normalizeText(childs[0].text)
                            properties[utils.normalizeText(
                                variableName)] = utils.getValidTextValue(
                                    childs[1].text)
                else:
                    if (prop != "styleUrl"):
                        properties[utils.normalizeText(
                            prop)] = utils.getValidTextValue(value)
            element["properties"] = properties

        sql = geojsonHandler.writeSQLScript(file,
                                            datasetName,
                                            outputDir,
                                            geojsonObject=geojson)
    except xml.parsers.expat.ExpatError as err:
        if removeInvalidProperties:
            print("no se pudo parsear el archivo", Logs.ERROR,
                  {"error": str(err)})
        else:
            process(file,
                    datasetName,
                    outputDir,
                    rawData=rawData,
                    removeInvalidProperties=True)
Ejemplo n.º 4
0
    def getConnection(self):
        conn = None

        try:
            conn = self.pool.getconn()
        except PoolError:
            print("error obteniendo una conexión a PostGIS, reintentando...",
                  Logs.ERROR)
            self.pool.closeall()  # close all active connections in the pool
            conn = self.pool.getconn()

        conn.autocommit = True
        return conn
Ejemplo n.º 5
0
    def printSQLScript(self, outputDir):
        table = self.file["tableName"]

        if self.file["type"] in processRouter:
            try:
                processRouter[self.file["type"]](self.file["path"], table, outputDir)
                return True
            except TypeError as typeErr:
                # none file created
                pass
            except Exception as e:
                print("Error procesando archivo", Logs.ERROR, {"error": str(e)})

        return False
Ejemplo n.º 6
0
def getEncodedFileContent(filename):
    content = None

    for encoder in ENCODERS_LIST:
        try:
            print("Leyendo archivo. encoding=" + str(encoder), Logs.INFO)

            file = open(filename, encoding=encoder)
            content = file.read()
            file.close()

            return content, encoder
        except Exception as e:
            print("Error leyendo archivo", Logs.ERROR, {"error": str(e)})
Ejemplo n.º 7
0
def extractZipFile(path, unzippedDir):
    try:
        zip_ref = zipfile.ZipFile(path, "r")
        zip_ref.extractall(unzippedDir)
        zip_ref.close()

        return True
    except BadZipFile as badZipException:
        print("archivo invalido", Logs.ERROR, {"error": str(badZipException)})
    except Exception as err:
        print("no se pudo extraer el contenido", Logs.ERROR, {
            "type": type(err),
            "message": str(err)
        })

    return False
Ejemplo n.º 8
0
def listAllFiles(dir, acceptedFiles=None, acceptedDirectory=None):
    filesToProcessList = []
    for root, dirnames, filenames in os.walk(dir):
        for filename in filenames:
            try:
                filenameAux, fileType = getFileNameAndType(filename)
            except Exception as e:
                print("No se pudo identificar el tipo de archivo", Logs.ERROR,
                      {"path": filename})
                continue

            if acceptedFiles is None or fileType in acceptedFiles:
                resource = os.path.join(root, filename)

                if acceptedDirectory is None or acceptedDirectory in resource:
                    filesToProcessList.append(resource)

    return filesToProcessList
Ejemplo n.º 9
0
def process(file, datasetName, outputDir, jsonObject=None):
    data = json.loads(utils.getEncodedFileContent(file)
                      [0]) if jsonObject is None else jsonObject

    # check if is geojson
    if "features" in data:
        geojsonObject = data
    else:
        geojsonObject = {"type": "FeatureCollection", "features": []}

        latitudColumn = None
        longitudColumn = None

        for column in list(data[0].keys()):
            # geographic columns
            if re.match(LATITUDE_KEY, column, re.IGNORECASE) is not None:
                latitudColumn = column
            elif re.match(LONGITUDE_KEY, column, re.IGNORECASE) is not None:
                longitudColumn = column

        if latitudColumn is None or longitudColumn is None:
            print("No se encontro información geografica", Logs.ERROR)
            return

        for row in data:
            feature = {
                "type": "Feature",
                "geometry": {
                    "type":
                    "Point",
                    "coordinates": [
                        row.pop(longitudColumn, "0"),
                        row.pop(latitudColumn, "0")
                    ]
                },
                "properties": row
            }

            geojsonObject["features"].append(feature)

    geojson.writeSQLScript(file,
                           datasetName,
                           outputDir,
                           geojsonObject=geojsonObject)
Ejemplo n.º 10
0
    def readFeatureData(self):
        if self.featureName:
            self.name = os.getenv("FEATURE_LABEL")
            self.ckanID = ""
            self.package_id = ""
            self.description = ""
            self.category = ""
        else:
            # get DGM information
            try:
                with urlopen( DGM_RESOURCES_API + "?id={}".format( self.ckanID ) ) as dgmResponse:
                    dgmResponse = json.loads(dgmResponse.read().decode("utf-8"))
                    if dgmResponse["pagination"]["total"] == 0:
                        return False

                    dgmResponse = dgmResponse["results"][0]

                    self.package_id = dgmResponse["package-id"]

                    # get CKAN information
                    with urlopen( CKAN_PACKAGE_API + "?id={}".format( self.package_id ) ) as ckanResponse:
                        ckanResponse = json.loads(ckanResponse.read().decode("utf-8"))
                        ckanResponse = ckanResponse["result"]

                        self.name = dgmResponse["name"] if "name" in dgmResponse else ""
                        self.description = dgmResponse["description"] if "description" in dgmResponse else ""

                        if "organization" in ckanResponse:
                            self.organization = ckanResponse["organization"]

                        for property in ckanResponse["extras"]:
                            if property["key"] == "theme":
                                self.category=property["value"]

                        if "tags" in ckanResponse:
                            for tag in ckanResponse["tags"]:
                                if tag["state"] == "active":
                                    self.tags.append(tag["display_name"])

                            self.tagsRaw = ckanResponse["tags"]

                        return self.isValid()
            except Exception as e:
                print("no se pudieron obtener los metadatos", Logs.ERROR, {"error": str(e)})
Ejemplo n.º 11
0
    def __init__(self):
        try:
            min_connections = 1
            max_connections = int(
                os.getenv("POSTGRES_MAX_CONNECTIONS")) if os.getenv(
                    "POSTGRES_MAX_CONNECTIONS") is not None else 10

            self.pool = SimpleConnectionPool(min_connections,
                                             max_connections,
                                             dbname=POSTGRES_DBNAME,
                                             user=POSTGRES_USER,
                                             host=POSTGRES_HOST,
                                             password=POSTGRES_PASSWORD,
                                             port=POSTGRES_PORT)
        except Exception as e:
            print(
                str(e), Logs.ERROR, {
                    "postgresql":
                    "{hostname}:{port}/{dbname}".format(hostname=POSTGRES_HOST,
                                                        port=POSTGRES_PORT,
                                                        dbname=POSTGRES_DBNAME)
                })
Ejemplo n.º 12
0
def process(file, datasetName, outputDir):
    sqlFilePath = "{outputDir}/{script}.sql".format(outputDir=outputDir,
                                                    script=datasetName)

    # get shp projection
    srid = GEOMETRY_COLUMN_SRID

    projFileName = file[:-4] + ".prj"

    if os.path.exists(projFileName):
        srid = getShpProjection(projFileName)

        for encoder in ENCODERS_LIST:
            try:
                print("Leyendo archivo", {"encoding": encoder})
                reader = shapefile.Reader(file, encoding=encoder)
                fields = reader.fields[1:]
                field_names = [field[0] for field in fields]
                buffer = []
                for sr in reader.shapeRecords():
                    atr = dict(zip(field_names, sr.record))
                    geom = sr.shape.__geo_interface__
                    buffer.append(
                        dict(type="Feature", geometry=geom, properties=atr))

                geojsonObject = {
                    "type": "FeatureCollection",
                    "features": buffer
                }

                geojson.writeSQLScript(file,
                                       datasetName,
                                       outputDir,
                                       geojsonObject=geojsonObject,
                                       srid=srid)

                return
            except UnicodeDecodeError as unicodeError:
                print("no se pudo leer el archivo", Logs.ERROR,
                      {"error": str(unicodeError)})
    else:
        print("no se encontro el archivo .prj")
Ejemplo n.º 13
0
    def runScript(self, scriptDir):
        insertCounter = 0
        geometryType = None

        conn = self.getConnection()
        for root, dirnames, filenames in os.walk(scriptDir):
            for filename in filenames:
                if filename[-4:] == '.sql':
                    scriptPath = root + "/" + filename

                    print("Ejecutando " + scriptPath, Logs.INFO)

                    with open(scriptPath, "r") as script:
                        # create db cursor
                        cursor = conn.cursor()

                        for query in script.read().split(
                                utils.QUERY_DELIMITER):
                            if query == "":
                                continue
                            try:
                                cursor.execute(query)

                                if query.startswith("INSERT INTO"):
                                    insertCounter += 1
                                elif geometryType is None and query.startswith(
                                        "SELECT AddGeometryColumn"):
                                    geometryType = query.split(",")[-2][1:-1]

                            except Exception as e:
                                print(str(e), Logs.ERROR, {"query": query})

                        cursor.close()

                        print("ok", Logs.INFO,
                              {"inserted_features": str(insertCounter)})

        self.closeConnection(conn)

        return insertCounter, geometryType
Ejemplo n.º 14
0
def publishLayer(file):
    metadata = file["metadata"]

    layerXml = "<featureType>"
    layerXml += "<name>{}</name>".format(file["tableName"])
    layerName = cleanString(metadata.name)
    layerXml += "<title>{}</title>".format(
        layerName if layerName != "" else file["tableName"])
    # layerXml += "<abstract>{}</abstract>".format( cleanString(metadata.description) )
    # tags
    # layerXml += "<keywords>"
    # for tag in metadata.tags:
    #     layerXml += "<string>{}</string>".format(tag)
    # layerXml += "</keywords>"
    layerXml += "</featureType>"

    url = "{geoserver_rest}/workspaces/{workspace}/datastores/{datastore}/featuretypes".format(
        workspace=GEOSERVER_WORKSPACE,
        datastore=GEOSERVER_DATASTORE,
        geoserver_rest=GEOSERVER_REST)

    headers = {'Content-Type': 'text/xml'}
    auth = (GEOSERVER_USER, GEOSERVER_PASSWORD)

    try:
        response = requests.post(url,
                                 headers=headers,
                                 auth=auth,
                                 data=layerXml)

        # raise exception with HTTP error code
        response.raise_for_status()

        print("tabla publicada correctamente en geoserver", Logs.INFO,
              {"geoserver_response": response.text})
        return True
    except HTTPError as httpError:
        print(
            "Error publicando en geoserver", Logs.ERROR, {
                "error":
                str(httpError),
                "type":
                type(httpError),
                "url":
                url,
                "documento":
                layerXml,
                "respuesta":
                "{code} | {response}".format(code=str(response.status_code),
                                             response=response.text)
            })
    except Exception as err:
        print(
            "Error conectando a geoserver", Logs.ERROR, {
                "error": str(err),
                "type": type(err),
                "url": url,
                "documento": layerXml
            })

    return False
Ejemplo n.º 15
0
        "Admite recursos que no cuenten con un valor de categoria en los metadatos"
    )
    parser.add_argument(
        "--allowDescriptionNone",
        action="store_true",
        help=
        "Admite recursos que no cuenten con un valor de descripción en los metadatos"
    )
    parser.add_argument(
        "--cleanCollection",
        action="store_true",
        help="Vacia la colección de mongo antes de iniciar el procesamiento")
    parser.add_argument(
        "--featureName",
        help=
        "Especifica un nombre para actualizar el API y geoserver sin consultar metadatos en DGM",
        metavar="name")
    parser.add_argument(
        "--justPublish",
        action="store_true",
        help=
        "Publica en el API la ruta especificada como id de geoserver, para especificar un label utiliza la opción --featureName"
    )

    args = parser.parse_args()

    start_time = time.time()
    start(args)
    print("--- Tiempo de ejecución: %s minutos ---" %
          ((time.time() - start_time) / 60))
Ejemplo n.º 16
0
def writeSQLScript(file, datasetName, outputDir, geojsonObject=None, srid=GEOMETRY_COLUMN_SRID):
    scriptFileName =  "{outputDir}/{script}.sql".format(outputDir=outputDir, script=datasetName)

    data = geojsonObject if geojsonObject is not None else json.loads( utils.getEncodedFileContent(file)[0] )

    print("Escribiendo script en " + scriptFileName, Logs.INFO)

    createTableSQL = ""
    geometrySQL = []
    insertSQL = []


    # table columns
    columns = []
    validColumns = []
    columnsType = {}
    columnString = ""

    textColumns = ["cve_ent", "cve_mun", "cve_loc", "cvegeo"] # numeric columns that should be treated as str (catalogs)
    for column in data["features"][0]["properties"]:
        validColumnName = utils.normalizeText(column)

        columns.append(column)
        validColumns.append(validColumnName)
        columnsType[validColumnName] = "text" if validColumnName in textColumns else utils.getObjType(data["features"][0]["properties"][column])
        columnString += validColumnName + ","


    sqlColumns = "gid serial PRIMARY KEY"
    for header in validColumns:
        sqlColumns += ",{column} {column_type}".format(column=header, column_type=columnsType[header])

    createTableSQL = sqlCreateTable.format(datasetName=datasetName, columns=sqlColumns)

    geometryColumns = []
    columnsCreated = {}
    counter = 0
    for element in data["features"]:
        geometryType = element["geometry"]["type"]
        properties = element["properties"]

        featuresToProcess = []
        if geometryType == "GeometryCollection":
            for geometry in element["geometry"]["geometries"]:
                featuresToProcess.append({
                    "geometry": geometry
                })
        else:
            featuresToProcess.append(element)


        for feature in featuresToProcess:
            geometryType = feature["geometry"]["type"]

            # validate geometry coordinates
            if geometryType.lower() == "point":
                feature["geometry"]["coordinates"] = feature["geometry"]["coordinates"][0:2]

            elif geometryType.lower() == "polygon":

                coordinates = [[]]
                for batchCoordinates in feature["geometry"]["coordinates"]:
                    # coordinates depth
                    if isinstance(batchCoordinates[0][0], list):
                        batchCoordinates = batchCoordinates[0]

                    for index in range(0, len(batchCoordinates)):
                        coordinates[0].append(batchCoordinates[index][0:2])


                feature["geometry"]["coordinates"] = coordinates

            elif geometryType.lower() == "linestring":
                for index in range(0, len(feature["geometry"]["coordinates"])):
                    feature["geometry"]["coordinates"][index] = feature["geometry"]["coordinates"][index][0:2]


            if not geometryType in columnsCreated:
                query, columnName = createGeometryColumn(datasetName, geometryType.upper(), "_{}".format(geometryType.lower()), srid=srid)
                geometryColumns.append(columnName)

                columnsCreated[geometryType] = True

                geometrySQL.append(query)


            values = []
            for index in range(0, len(columns)):
                try:
                    value = "" if properties[ columns[index] ] is None else utils.getValidTextValue(properties[ columns[index] ])
                except:
                    value = ""
                    pass

                values.append( utils.getValidSQLValue(value, columnsType[validColumns[index]]) )

            values.append("ST_SetSRID(ST_GeomFromGeoJSON('" + json.dumps(feature["geometry"]) + "')," + srid + ")")

            sql = sqlInsert.format(dataset=datasetName, columns=columnString, values=",".join(values), geometry_column=GEOMETRY_COLUMN_NAME, suffix="_"+geometryType.lower())
            insertSQL.append(sql)


            # WRITE SQL FILE
            with open( scriptFileName , "w" ) as sqlScript:
                # create table
                sqlScript.write( createTableSQL + utils.QUERY_DELIMITER )

                # create geometries
                for query in geometrySQL:
                    sqlScript.write( query + utils.QUERY_DELIMITER )

                # insert features
                for query in insertSQL:
                    sqlScript.write( query + utils.QUERY_DELIMITER )
Ejemplo n.º 17
0
    def __init__(self, arguments, postgisConnection):
        self.postgisConnection = postgisConnection
        self.arguments = arguments

        if arguments["justPublish"]:
            newLayer = {
                "geoserver": arguments["path"],
                "name_resource": arguments["featureName"] if arguments["featureName"] else arguments["path"].replace("_"," ").capitalize()
            }

            if MongoApiCollection().addLayer( None, newLayer ):
                print("API actualizada", Logs.INFO)
            else:
                print("no se pudo actualizar el API", Logs.ERROR)

        else:
            self.fileList = []

            path = arguments["path"]

            if os.path.exists( path ):
                if os.path.isdir( path ):
                    print("procesando directorio", Logs.INFO)

                    for file in utils.listAllFiles( path, acceptedFiles + zippedFiles ):
                        self.fileList.append( self.buildConfigObject(file) )

                else:
                    self.fileList.append( self.buildConfigObject(path) )
            else:
                print("archivo no encontrado", Logs.ERROR)
                self.counter["error_archivo"] += 1

            # clean
            if arguments["cleanCollection"]:
                print("limpiando colección", Logs.INFO)
                deleted = MongoApiCollection().cleanCollection()
                print("documentos eliminados", Logs.INFO, {"documentos": deleted.raw_result})
Ejemplo n.º 18
0
    def processFile(self, file):
        if geoserver.featureExists( file["tableName"] ) and (not self.arguments["forceUpdates"] or not self.arguments["skipUpdates"] ):
            print("la capa ya existe en geoserver", Logs.ERROR)
            self.counter["procesados_anteriormente"] += 1
            return

        caster = PostgisCaster( file )
        workDirectory = file["tableName"]

        try:
            os.makedirs( workDirectory )
        except:
            workDirectory += ''.join(random.choice( string.ascii_lowercase ) for i in range(5))
            os.makedirs( workDirectory )

        if caster.printSQLScript( workDirectory ):
            insertCounter, file["geometryType"] = self.postgisConnection.runScript( workDirectory )

            if file["geometryType"] is not None:
                if self.arguments["skipUpdates"]:
                    # script params
                    print("omitiendo publicación de capas", Logs.INFO)
                else:
                    # publish in geoserver
                    geoserver.publishLayer( file )

                    if self.arguments["featureName"] or file["metadata"].isValid():
                        # update API
                        if MongoApiCollection().addLayer( file ):
                            print("API actualizada", Logs.INFO)
                        else:
                            print("no se pudo actualizar el API", Logs.ERROR)
                    else:
                        print("omitiendo publicación en API", Logs.ERROR, {"error": "metadatos no validos", "metadatos": str(file["metadata"])})

                    self.counter["procesados_correctamente"] += 1


        else:
            print("no se generaron los comandos sql", Logs.ERROR)
            self.counter["error_archivo"] += 1

        # cleanning
        shutil.rmtree( workDirectory )
Ejemplo n.º 19
0
    def export(self):
        while len(self.fileList) > 0:
            file = self.fileList.pop()

            self.counter["total"] += 1

            print("--------------------------------------------------------------------")
            print("procesando " + file["path"], Logs.INFO )

            if file["type"] not in acceptedFiles and file["type"] not in zippedFiles:
                print("Archivo no soportado", Logs.ERROR)
                self.counter["archivo_no_soportado"] += 1
                continue


            if self.arguments["forceUpdates"] or self.arguments["skipUpdates"] or not geoserver.featureExists( file["tableName"] ):
                # the layer is not in geoserver
                metadata = file["metadata"]

                if self.arguments["forceUpdates"] or self.arguments["skipUpdates"] or self.arguments["featureName"] or metadata.readFeatureData():
                    if file["type"] in zippedFiles:
                        # is a zipped directory
                        extractedFiles = zip.extract( file["path"], file["ckanID"] )

                        if extractedFiles is not None:
                            namePreffix = "" if len(extractedFiles) == 1 else "{}__".format( file["tableName"] )

                            for extractedFile in extractedFiles:
                                configObject = self.buildConfigObject( extractedFile, metadata=metadata )

                                name = utils.normalizeText( configObject["name"] )
                                name = name[ len("conjunto_de_datos_"):] if "conjunto_de_datos_" in name else name
                                configObject["tableName"] = namePreffix + name
                                configObject["extracted"] = True

                                self.fileList.append( configObject )
                    else:
                        # process file
                        self.processFile( file )
                else:
                    print("no se encontraron metadatos en CKAN/DGM", Logs.ERROR, {"metadata": str(metadata)} )
                    self.counter["metadatos_error"] += 1
            else:
                print("la capa ya existe en geoserver", Logs.ERROR)
                self.counter["procesados_anteriormente"] += 1

        # print counters
        print("--------------------------------------------------------------------")
        print("counters", Logs.INFO, self.counter)