Ejemplo n.º 1
0
def split_colval_into_cols(db_name, table, column, splitChar, new_cols,
                           new_table):
    """
    Split column value into several columns
    """

    from gasp.sql.i import cols_name

    if type(new_cols) != list:
        raise ValueError('new_cols should be a list')

    nr_cols = len(new_cols)

    if nr_cols < 2:
        raise ValueError('new_cols should have 2 or more elements')

    # Get columns types from table
    tblCols = cols_name(db_name, table)

    # SQL construction
    SQL = "SELECT {}, {} FROM {}".format(
        ", ".join(tblCols), ", ".join([
            "split_part({}, '{}', {}) AS {}".format(column, splitChar, i + 1,
                                                    new_cols[i])
            for i in range(len(new_cols))
        ]), table)

    q_to_ntbl(db_name, new_table, SQL, api='psql')

    return new_table
Ejemplo n.º 2
0
def split_table_by_col_distinct(db, tbl, col):
    """
    Create a new table for each value in one column
    """
    
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import cols_type
    from gasp.sql.to import q_to_ntbl
    
    fields_types = cols_type(db, tbl)
    
    # Get unique values
    VALUES = q_to_obj(db,
        "SELECT {col} FROM {t} GROUP BY {col}".format(
            col=col, t=tbl
        ), db_api='psql'
    )[col].tolist()
    
    whr = '{}=\'{}\'' if fields_types[col] == str else '{}={}'
    
    for row in VALUES:
        q_to_ntbl(
            db, '{}_{}'.format(tbl, str(row[0])),
            "SELECT * FROM {} WHERE {}".format(
                tbl, whr.format(col, str(row[0]))
        ), api='psql')
Ejemplo n.º 3
0
Archivo: prox.py Proyecto: jasp382/gasp
def splite_buffer(db,
                  table,
                  dist,
                  geomField,
                  outTbl,
                  cols_select=None,
                  bufferField="geometry",
                  whrClause=None,
                  outTblIsFile=None,
                  dissolve=None):
    """
    Run ST_Buffer
    
    if not dissolve, no generalization will be applied; 
    if dissolve == to str or list, a generalization will be accomplish
    using the fields referenced by this object;
    if dissolve == 'ALL', all features will be dissolved.
    """

    from gasp.pyt import obj_to_lst

    dissolve = obj_to_lst(dissolve) if dissolve != "ALL" else "ALL"

    sql = (
        "SELECT{sel}{spFunc}{geom}, {_dist}{endFunc} AS {bf} "
        "FROM {tbl}{whr}{grpBy}"
    ).format(
        sel = " " if not cols_select else " {}, ".format(
            ", ".join(obj_to_lst(cols_select))
        ),
        tbl=table,
        geom=geomField, _dist=str(dist), bf=bufferField,
        whr="" if not whrClause else " WHERE {}".format(whrClause),
        spFunc="ST_Buffer(" if not dissolve else \
            "ST_UnaryUnion(ST_Collect(ST_Buffer(",
        endFunc = ")" if not dissolve else ")))",
        grpBy="" if not dissolve or dissolve == "ALL" else " GROUP BY {}".format(
            ", ".join(dissolve)
        )
    )

    if outTblIsFile:
        from gasp.gt.attr import sel_by_attr

        sel_by_attr(db, sql, outTbl, api_gis='ogr')

    else:
        from gasp.sql.to import q_to_ntbl

        q_to_ntbl(db, outTbl, sql, api='ogr2ogr')

    return outTbl
Ejemplo n.º 4
0
def split_table_entity_number(db, table, entity_field, entity_number):
    """
    Split tables in several using as reference a number of entities per table
    
    If a table has 1 000 000 entities and the entity_number is 250 000,
    this method will create four tables, each one with 250 000 entities.
    250 000 entities, not rows. Don't forget that the main table may have
    more than one reference to the same entity.
    """
    
    import pandas
    from gasp.sql.fm import q_to_obj
    from gasp.sql.i  import cols_type
    from gasp.sql.to import q_to_ntbl
    
    # Select entities in table
    entities = q_to_obj(db, "SELECT {c} FROM {t} GROUP BY {c}".format(
        c=entity_field, t=table
    ), db_api='psql')
    
    # Split entities into groups acoording entity_number
    entityGroup = []
    
    lower = 0
    high = entity_number
    while lower <= len(entities.index):
        if high > len(entities.index):
            high = len(entities.index)
        
        entityGroup.append(entities.iloc[lower : high])
        
        lower += entity_number
        high  += entity_number
    
    # For each dataframe, create a new table
    COLS_TYPE = cols_type(db, table)
    
    c = 0
    for df in entityGroup:
        if COLS_TYPE[entity_field] != str:
            df[entity_field] = '{}='.format(entity_field) + df[entity_field].astype(str)
        else:
            df[entity_field] = '{}=\''.format(entity_field) + df[entity_field].astype(str) + '\''
        
        whr = ' OR '.join(df[entity_field])
        
        q_to_ntbl(db, '{}_{}'.format(table, str(c)), (
            "SELECT * FROM {} WHERE {}"
        ).format(table, whr), api='psql')
        
        c += 1
Ejemplo n.º 5
0
def txt_cols_to_col(db, inTable, columns, strSep, newCol, outTable=None):
    """
    Several text columns to a single column
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    mergeCols = obj_to_lst(columns)

    tblCols = cols_type(db, inTable, sanitizeColName=None, pyType=False)

    for col in mergeCols:
        if tblCols[col] != 'text' and tblCols[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    coalesce = ""
    for i in range(len(mergeCols)):
        if not i:
            coalesce += "COALESCE({}, '')".format(mergeCols[i])

        else:
            coalesce += " || '{}' || COALESCE({}, '')".format(
                strSep, mergeCols[i])

    if outTable:
        # Write new table
        colsToSelect = [_c for _c in tblCols if _c not in mergeCols]

        if not colsToSelect:
            sel = coalesce + " AS {}".format(newCol)
        else:
            sel = "{}, {}".format(", ".join(colsToSelect),
                                  coalesce + " AS {}".format(newCol))

        q_to_ntbl(db,
                  outTable,
                  "SELECT {} FROM {}".format(sel, inTable),
                  api='psql')

        return outTable

    else:
        # Add column to inTable
        from gasp.sql.tbl import update_table

        add_field(db, inTable, {newCol: 'text'})

        update_table(db, inTable, {newCol: coalesce})

        return inTable
Ejemplo n.º 6
0
def trim_char_in_col(db,
                     pgtable,
                     cols,
                     trim_str,
                     outTable,
                     onlyTrailing=None,
                     onlyLeading=None):
    """
    Python implementation of the TRIM PSQL Function
    
    The PostgreSQL trim function is used to remove spaces or set of
    characters from the leading or trailing or both side from a string.
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    cols = obj_to_lst(cols)

    colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False)

    for col in cols:
        if colsTypes[col] != 'text' and colsTypes[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    colsToSelect = [_c for _c in colsTypes if _c not in cols]

    tail_lead_str = "" if not onlyTrailing and not onlyLeading else \
        "TRAILING " if onlyTrailing and not onlyLeading else \
        "LEADING " if not onlyTrailing and onlyLeading else ""

    trimCols = [
        "TRIM({tol}{char} FROM {c}) AS {c}".format(c=col,
                                                   tol=tail_lead_str,
                                                   char=trim_str)
        for col in cols
    ]

    if not colsToSelect:
        cols_to_select = "{}".format(", ".join(trimCols))
    else:
        cols_to_select = "{}, {}".format(", ".join(colsToSelect),
                                         ", ".join(trimCols))

    q_to_ntbl(db,
              outTable,
              "SELECT {} FROM {}".format(colsToSelect, pgtable),
              api='psql')
Ejemplo n.º 7
0
def pnts_to_lines(db,
                  inTable,
                  outTable,
                  entityCol,
                  orderCol,
                  geomCol=None,
                  xCol=None,
                  yCol=None,
                  epsg=4326):
    """
    Given a table with points by entity, create a new table with a polyline
    for each entity. The points are added to the polyline based on a 
    sequence in one column.
    """

    if not geomCol:
        if not xCol or not yCol:
            raise ValueError(
                'If geomCol is not specified, xCol and ycol must replace it!')

    from gasp.sql.to import q_to_ntbl

    geomRef = geomCol if geomCol else "ST_MakePoint({}, {})".format(xCol, yCol)

    Q = ("SELECT {entCol}, ST_SetSRID(ST_MakeLine("
         "array_agg({pntCol} ORDER BY {orderF})), {srs}) "
         "FROM {tbl} GROUP BY {entCol}").format(entCol=entityCol,
                                                pntCol=geomRef,
                                                orderF=orderCol,
                                                srs=epsg,
                                                tbl=inTable)

    return q_to_ntbl(db, outTable, Q, api='psql')
Ejemplo n.º 8
0
def geom_to_points(db,
                   table,
                   geomCol,
                   outTable,
                   selCols=None,
                   newGeomCol=None):
    """
    Convert a Polygon/Polyline Geometry to Points
    
    Equivalent to feature to point tool
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    selCols = obj_to_lst(selCols)

    Q = ("SELECT {cols}(ST_DumpPoints({geom})).geom AS {newCol} "
         "FROM {tbl}").format(
             cols="" if not selCols else "{}, ".format(", ".join(selCols)),
             geom=geomCol,
             newCol="geom" if not newGeomCol else newGeomCol,
             tbl=table)

    return q_to_ntbl(db, outTable, Q, api='psql')
Ejemplo n.º 9
0
def select_main_geom_type(db, table, outbl, geomCol='geom'):
    """
    Assuming a table with several geometry types, this method
    counts the rows for each geometry type and select the rows with a geometry
    type with more rows
    """
    
    from gasp.sql.to import q_to_ntbl
    from gasp.sql.i  import cols_name
    
    COLS = [x for x in cols_name(
        db, table, sanitizeSpecialWords=None
    ) if x != geomCol]
    
    Q = (
        "SELECT {cols}, {geomcol} FROM ("
            "SELECT *, MAX(jtbl.geom_cont) OVER (PARTITION BY "
            "jtbl.tst) AS max_cnt FROM ("
                "SELECT {cols}, (ST_Dump({geomcol})).geom AS {geomcol}, "
                "ST_GeometryType((ST_Dump({geomcol})).geom) AS geom_type "
                "FROM {tbl}"
            ") AS foo INNER JOIN ("
                "SELECT ST_GeometryType((ST_Dump({geomcol})).geom) AS gt, "
                "COUNT(ST_GeometryType((ST_Dump({geomcol})).geom)) AS geom_cont, "
                "1 AS tst FROM {tbl} GROUP BY ST_GeometryType((ST_Dump({geomcol})).geom)"
            ") AS jtbl ON foo.geom_type = jtbl.gt"
        ") AS foo WHERE geom_cont = max_cnt"
    ).format(
        cols=", ".join(COLS), geomcol=geomCol,
        tbl=table
    )
    
    return q_to_ntbl(db, outbl, Q, api='psql')
Ejemplo n.º 10
0
def rows_notin_q(db, tblA, tblB, joinCols, newTable,
                 cols_to_mantain=None, tblAisQuery=None,
                 tblBisQuery=None):
    """
    Get rows from tblA that are not present in tblB
    
    joinCols = {colTblA : colTblB}
    """
    
    from gasp.pyt    import obj_to_lst
    from gasp.sql.to import q_to_ntbl
    
    cols_to_mantain = obj_to_lst(cols_to_mantain)
    
    q = (
        "SELECT {cls} FROM {ta} LEFT JOIN {tb} ON "
        "{rel} WHERE {tblB}.{fldB} IS NULL"
    ).format(
        cls=cols_to_mantain if cols_to_mantain else "{}.*".format(tblA),
        ta=tblA if not tblAisQuery else tblAisQuery,
        tb=tblB if not tblBisQuery else tblBisQuery,
        rel=" AND ".join(["{ta}.{ca} = {tb}.{cb}".format(
            ta=tblA, tb=tblB, ca=k, cb=joinCols[k]
        ) for k in joinCols])
    )
    
    newTable = q_to_ntbl(db, newTable, q, api='psql')
    
    return newTable
Ejemplo n.º 11
0
def matrix_od_mean_dist_by_group(MATRIX_OD, ORIGIN_COL, GROUP_ORIGIN_ID,
                                 GROUP_ORIGIN_NAME, GROUP_DESTINA_ID,
                                 GROUP_DESTINA_NAME, TIME_COL, epsg, db,
                                 RESULT_MATRIX):
    """
    Calculate Mean GROUP distance from OD Matrix
    
    OD MATRIX EXAMPLE
    | origin_entity | origin_group | destina_entity | destina_group | distance
    |     XXXX      |     XXXX     |      XXXX      |      XXX      |   XXX
    
    OUTPUT EXAMPLE
    | origin_group | destina_group | mean_distance
    |     XXXX     |      XXXX     |      XXXX
    """

    import os
    from gasp.pyt.oss import fprop
    from gasp.gql.to import shp_to_psql
    from gasp.sql.db import create_db
    from gasp.sql.to import q_to_ntbl
    from gasp.to import db_to_tbl

    db = create_db(fprop(MATRIX_OD, 'fn'), overwrite=True, api='psql')

    TABLE = shp_to_psql(db,
                        MATRIX_OD,
                        pgTable="tbl_{}".format(db),
                        api="pandas",
                        srsEpsgCode=epsg)

    OUT_TABLE = q_to_ntbl(
        db,
        fprop(RESULT_MATRIX, 'fn'),
        ("SELECT {groupOriginCod}, {groupOriginName}, {groupDestCod}, "
         "{groupDestName}, AVG(mean_time) AS mean_time FROM ("
         "SELECT {origin}, {groupOriginCod}, {groupOriginName}, "
         "{groupDestCod}, {groupDestName}, "
         "AVG({timeCol}) AS mean_time FROM {t} "
         "GROUP BY {origin}, {groupOriginCod}, {groupOriginName}, "
         "{groupDestCod}, {groupDestName}"
         ") AS foo "
         "GROUP BY {groupOriginCod}, {groupOriginName}, "
         "{groupDestCod}, {groupDestName} "
         "ORDER BY {groupOriginCod}, {groupDestCod}").format(
             groupOriginCod=GROUP_ORIGIN_ID,
             groupOriginName=GROUP_ORIGIN_NAME,
             groupDestCod=GROUP_DESTINA_ID,
             groupDestName=GROUP_DESTINA_NAME,
             origin=ORIGIN_COL,
             timeCol=TIME_COL,
             t=TABLE),
        api='psql')

    return db_to_tbl(db,
                     "SELECT * FROM {}".format(OUT_TABLE),
                     RESULT_MATRIX,
                     sheetsNames="matrix",
                     dbAPI='psql')
Ejemplo n.º 12
0
def st_dissolve(db, table, geomColumn, outTable, whrClause=None,
                diss_cols=None, outTblIsFile=None, api='sqlite'):
    """
    Dissolve a Polygon table
    """
    
    from gasp.pyt import obj_to_lst
    
    diss_cols = obj_to_lst(diss_cols) if diss_cols else None
    geomcol = "geometry" if api == 'sqlite' else 'geom'
    
    sql = (
        "SELECT{selCols} ST_UnaryUnion(ST_Collect({geom})) AS {gout} "
        "FROM {tbl}{whr}{grpBy}"
    ).format(
        selCols="" if not diss_cols else " {},".format(", ".join(diss_cols)),
        geom=geomColumn, tbl=table,
        whr="" if not whrClause else " WHERE {}".format(whrClause),
        grpBy="" if not diss_cols else " GROUP BY {}".format(
            ", ".join(diss_cols)
        ), gout=geomcol
    )
    
    if outTblIsFile:
        if api == 'sqlite':
            from gasp.gt.attr import sel_by_attr
            
            sel_by_attr(db, sql, outTable, api_gis='ogr')
        
        elif api == 'psql':
            from gasp.gt.toshp.db import dbtbl_to_shp
            
            dbtbl_to_shp(
                db, table, geomColumn, outTable, api='pgsql2shp',
                tableIsQuery=True
            )
    
    else:
        from gasp.sql.to import q_to_ntbl
        
        q_to_ntbl(
            db, outTable, sql, api='ogr2ogr' if api == 'sqlite' else 'psql'
        )
    
    return outTable
Ejemplo n.º 13
0
Archivo: prox.py Proyecto: jasp382/gasp
def st_buffer(db,
              inTbl,
              bfDist,
              geomCol,
              outTbl,
              bufferField="geometry",
              whrClause=None,
              dissolve=None,
              cols_select=None,
              outTblIsFile=None):
    """
    Using Buffer on PostGIS Data
    """

    from gasp.pyt import obj_to_lst

    dissolve = obj_to_lst(dissolve) if dissolve != "ALL" else "ALL"

    SEL_COLS = "" if not cols_select else ", ".join(obj_to_lst(cols_select))
    DISS_COLS = "" if not dissolve or dissolve == "ALL" else ", ".join(
        dissolve)
    GRP_BY = "" if not dissolve else "{}, {}".format(SEL_COLS, DISS_COLS) if \
        SEL_COLS != "" and DISS_COLS != "" else SEL_COLS \
        if SEL_COLS != "" else DISS_COLS if DISS_COLS != "" else ""

    Q = (
        "SELECT{sel}{spFunc}{geom}, {_dist}{endFunc} AS {bf} "
        "FROM {t}{whr}{grpBy}"
    ).format(
        sel = " " if not cols_select else " {}, ".format(SEL_COLS),
        spFunc="ST_Buffer(" if not dissolve else \
            "ST_UnaryUnion(ST_Collect(ST_Buffer(",
        geom=geomCol, _dist=bfDist,
        endFunc=")" if not dissolve else ")))",
        t=inTbl,
        grpBy=" GROUP BY {}".format(GRP_BY) if GRP_BY != "" else "",
        whr="" if not whrClause else " WHERE {}".format(whrClause),
        bf=bufferField
    )

    if not outTblIsFile:
        from gasp.sql.to import q_to_ntbl

        outTbl = q_to_ntbl(db, outTbl, Q, api='psql')

    else:
        from gasp.gt.toshp.db import dbtbl_to_shp

        dbtbl_to_shp(db,
                     Q,
                     bufferField,
                     outTbl,
                     api='pgsql2shp',
                     tableIsQuery=True)

    return outTbl
Ejemplo n.º 14
0
def replace_char_in_col(db, pgtable, cols, match_str, replace_str, outTable):
    """
    Replace char in all columns in cols for the value of replace_str
    
    Python implementation of the REPLACE PSQL Function
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    cols = obj_to_lst(cols)

    colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False)

    for col in cols:
        if colsTypes[col] != 'text' and colsTypes[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    colsToSelect = [_c for _c in colsTypes if _c not in cols]

    colsReplace = [
        "REPLACE({c}, '{char}', '{nchar}') AS {c}".format(c=col,
                                                          char=match_str,
                                                          nchar=replace_str)
        for col in cols
    ]

    if not colsToSelect:
        cols_to_select = "{}".format(", ".join(colsReplace))
    else:
        cols_to_select = "{}, {}".format(", ".join(colsToSelect),
                                         ", ".join(colsReplace))

    q_to_ntbl(db,
              outTable,
              "SELECT {cols} FROM {tbl}".format(cols=cols_to_select,
                                                tbl=pgtable),
              api='psql')

    return outTable
Ejemplo n.º 15
0
Archivo: tbl.py Proyecto: jasp382/gasp
def tbls_to_tbl(db, lst_tables, outTable):
    """
    Append all tables in lst_tables into the outTable
    """

    from gasp.sql.to import q_to_ntbl

    sql = " UNION ALL ".join(
        ["SELECT * FROM {}".format(t) for t in lst_tables])

    outTable = q_to_ntbl(db, outTable, sql, api='psql')

    return outTable
Ejemplo n.º 16
0
def split_table_by_range(db, table, row_number):
    """
    Split tables in several
    """
    
    from gasp.sql.i  import cols_name, row_num
    from gasp.sql.to import q_to_ntbl
    
    rowsN = row_num(db, table, api='psql')
    
    nrTables = int(rowsN / float(row_number)) + 1
    
    COLS = cols_name(db, table)
    
    offset = 0
    for i in range(nrTables):
        q_to_ntbl(
            db, '{}_{}'.format(table, str(i)),
            "SELECT * FROM {} ORDER BY {} OFFSET {} LIMIT {} ;".format(
                table, ', '.join(COLS), str(offset), str(row_number) 
            ), api='psql'
        )
        
        offset += row_number
Ejemplo n.º 17
0
def col_to_timestamp(db,
                     inTbl,
                     dayCol,
                     hourCol,
                     minCol,
                     secCol,
                     newTimeCol,
                     outTbl,
                     selColumns=None,
                     whr=None):
    """
    Columns to timestamp column
    """

    from gasp.pyt import obj_to_lst

    selCols = obj_to_lst(selColumns)

    sql = ("SELECT {C}, TO_TIMESTAMP("
           "COALESCE(CAST({day} AS text), '') || ' ' || "
           "COALESCE(CAST({hor} AS text), '') || ':' || "
           "COALESCE(CAST({min} AS text), '') || ':' || "
           "COALESCE(CAST({sec} AS text), ''), 'YYYY-MM-DD HH24:MI:SS'"
           ") AS {TC} FROM {T}{W}").format(
               C="*" if not selCols else ", ".join(selCols),
               day=dayCol,
               hor=hourCol,
               min=minCol,
               sec=secCol,
               TC=newTimeCol,
               T=inTbl,
               W="" if not whr else " WHERE {}".format(whr))

    q_to_ntbl(db, outTbl, sql, api='psql')

    return outTbl
Ejemplo n.º 18
0
Archivo: cnv.py Proyecto: jasp382/gasp
def lnh_to_polg(db, intbl, outtbl):
    """
    Line to Polygons
    """

    from gasp.sql.to import q_to_ntbl

    Q = ("SELECT ROW_NUMBER() OVER (ORDER BY (SELECT NULL)) AS gid, "
         "(ST_Dump(ST_Polygonize(geom))).geom AS geom FROM ("
         "SELECT ST_Node(ST_Collect(geom)) AS geom FROM ("
         "SELECT (ST_Dump(geom)).geom FROM {}"
         ") AS foo"
         ") AS foo").format(intbl)

    return q_to_ntbl(db, outtbl, Q)
Ejemplo n.º 19
0
def sel_where_groupByIs(db,
                        table,
                        groupByCols,
                        grpByOp,
                        grpByVal,
                        outTable,
                        filterWhere=None):
    """
    Select rows in table where the GROUP BY values of the groupByCols agrees with
    the statment formed by grpByOp and grpByVal
    
    For the following parameters:
    table=tst_table, groupByCols=[day, hour], grpByOp=>, grpByVal=1
    
    This method will create a new table using a query such
    SELECT tst_table.* FROM tst_table INNER JOIN (
        SELECT day, hour, COUNT(day) AS cnt_day FROM tst_table
        GROUP BY day, hour
    ) AS foo ON tst_table.day = foo.day AND tst_table.hour = foo.hour
    WHERE foo.cnt_day > 1
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    groupByCols = obj_to_lst(groupByCols)

    q = ("SELECT {t}.* FROM {t} INNER JOIN ("
         "SELECT {cls}, COUNT({col}) AS cnt_{col} "
         "FROM {t} GROUP BY {cls}"
         ") AS foo ON {jOn} "
         "WHERE foo.cnt_{col} {op} {val}{fwhr}").format(
             t=table,
             cls=", ".join(groupByCols),
             col=groupByCols[0],
             jOn=" AND ".join([
                 "{t}.{c} = foo.{c}".format(t=table, c=x) for x in groupByCols
             ]),
             op=grpByOp,
             val=grpByVal,
             fwhr="" if not filterWhere else " AND ({})".format(filterWhere))

    outTable = q_to_ntbl(db, outTable, q, api='psql')

    return outTable
Ejemplo n.º 20
0
def sql_proj(dbname,
             tbl,
             otbl,
             oepsg,
             cols=None,
             geomCol=None,
             newGeom=None,
             whr=None,
             new_pk=None):
    """
    Reproject geometric layer to another spatial reference system (srs)
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    geomCol = 'geom' if not geomCol else geomCol
    newGeom = 'geom' if not newGeom else newGeom

    if not cols:
        from gasp.sql.i import cols_name

        cols = cols_name(dbname, tbl)

        cols.remove(geomCol)

    else:
        cols = obj_to_lst(cols)

        if geomCol in cols and geomCol == newGeom:
            cols.remove(geomCol)
            cols.append('{c} AS old_{c}'.format(c=geomCol))

    Q = ("SELECT {}, ST_Transform({}, {}) AS {} "
         "FROM {}{}").format(", ".join(cols), geomCol, str(oepsg), newGeom,
                             tbl, "" if not whr else " WHERE {}".format(whr))

    otbl = q_to_ntbl(dbname, otbl, Q, api='psql')

    if new_pk:
        from gasp.sql.k import create_pk

        create_pk(dbname, otbl, new_pk)

    return otbl
Ejemplo n.º 21
0
def add_endpnt_to_tbl(db,
                      inTable,
                      outTable,
                      idCol='gid',
                      geomCol='geom',
                      startCol="start_vertex",
                      endCol="end_vertex"):
    """
    Add start/end points columns to table
    """

    from gasp.sql.to import q_to_ntbl
    from gasp.sql.i import cols_name

    return q_to_ntbl(db,
                     outTable,
                     ("SELECT {cols}, {stPnt}, {endPnt} FROM ("
                      "SELECT *, lead({stPnt}) OVER ("
                      "PARTITION BY {colId} ORDER BY pnt_idx) AS {endPnt} "
                      "FROM ("
                      "SELECT {cols}, pnt_idx, {stPnt}, "
                      "CASE "
                      "WHEN pnt_idx = 1 OR pnt_idx = MAX(pnt_idx) "
                      "OVER (PARTITION BY {colId}) "
                      "THEN 1 ELSE 0 END AS pnt_cat "
                      "FROM ("
                      "SELECT {cols}, "
                      "(ST_DumpPoints({geomF})).path[1] AS pnt_idx, "
                      "(ST_DumpPoints({geomF})).geom AS {stPnt} "
                      "FROM {table}"
                      ") AS foo"
                      ") AS foo2 "
                      "WHERE pnt_cat = 1"
                      ") AS foo3 "
                      "WHERE {endPnt} IS NOT NULL "
                      "ORDER BY {colId}, pnt_idx").format(cols=", ".join(
                          cols_name(db, inTable)),
                                                          stPnt=startCol,
                                                          endPnt=endCol,
                                                          colId=idCol,
                                                          geomF=geomCol,
                                                          table=inTable),
                     api='psql')
Ejemplo n.º 22
0
def xycols_to_geom(db,
                   intbl,
                   x_col,
                   y_col,
                   outtable,
                   geom_field='geom',
                   epsg=4326):
    """
    X and Y Colums to PostGIS Geom Column
    """

    from gasp.sql.to import q_to_ntbl

    return q_to_ntbl(db,
                     outtable,
                     ("SELECT *, ST_SetSRID(ST_MakePoint({}, {}), {}) AS {} "
                      "FROM {}").format(x_col, y_col, str(epsg), geom_field,
                                        intbl),
                     api='psql')
Ejemplo n.º 23
0
def split_lines_on_pnt(db, inTbl, pntTbl, outTbl, idlnhPnt, lnhid):
    """
    Split lines on point locations
    """

    from gasp.sql.i import cols_name
    from gasp.sql.to import q_to_ntbl

    # Get cols of lnhTbl
    cols = ", ".join([
        c for c in cols_name(db, inTbl, sanitizeSpecialWords=True, api='psql')
        if c != 'geom' and c != idlnhPnt
    ])

    # Force MultiLineString to LineString
    sanQ = ("SELECT {lid}, {cln}, (ST_Dump(geom)).geom AS geom "
            "FROM {t}) AS mtbl").format(lid=lnhid, cln=cols, t=inTbl)

    # Split Query
    Q = ("SELECT {lid}, {cln}, (ST_Dump(geom)).geom AS geom FROM ("
         "SELECT mtbl.{lid}, {cln}, "
         "CASE "
         "WHEN jtbl.{pid} IS NULL THEN mtbl.geom "
         "ELSE ST_Split(mtbl.geom, jtbl.geom) "
         "END AS geom "
         "FROM {lnh_tbl} LEFT JOIN ("
         "SELECT {pid}, ST_Collect(geom) AS geom "
         "FROM {pnt_tbl} "
         "GROUP BY {pid}"
         ") AS jtbl on mtbl.{lid} = jtbl.{pid}"
         ") AS foo").format(lid=lnhid,
                            cln=cols,
                            pid=idlnhPnt,
                            lnh_tbl=sanQ,
                            pnt_tbl=pntTbl)

    # Produce new table and return it
    return q_to_ntbl(db, outTbl, Q)
Ejemplo n.º 24
0
def dsn_data_collection_by_multibuffer(inBuffers, workspace, db, datasource,
                                       keywords=None):
    """
    Extract Digital Social Network Data for each sub-buffer in buffer.
    A sub-buffer is a buffer with a radius equals to the main buffer radius /2
    and with a central point at North, South, East, West, Northeast, Northwest,
    Southwest and Southeast of the main buffer central point.
    
    inBuffers = {
        "lisbon"    : {
            'x'      : -89004.994779, # in meters
            'y'      : -102815.866054, # in meters
            'radius' : 10000,
            'epsg'   : 3763
        },
        "london     : {
            'x'      : -14210.551441, # in meters
            'y'      : 6711542.47559, # in meters
            'radius' : 10000,
            'epsg'   : 3857
        }
    }
    or
    inBuffers = {
        "lisbon" : {
            "path" : /path/to/file.shp,
            "epsg" : 3763
        }
    }
    
    keywords = ['flood', 'accident', 'fire apartment', 'graffiti', 'homeless']
    
    datasource = 'facebook' or datasource = 'flickr'
    TODO: Only works for Flickr and Facebook
    """
    
    import os; from osgeo import ogr
    from gasp.pyt         import obj_to_lst
    from gasp.sql.db      import create_db
    from gasp.sql.to      import q_to_ntbl
    from gasp.sql.to      import df_to_db
    from gasp.gql.to      import shp_to_psql
    from gasp.gt.toshp    import df_to_shp
    from gasp.gt.toshp.db import dbtbl_to_shp
    from gasp.gt.prox.bf  import get_sub_buffers, dic_buffer_array_to_shp
    
    if datasource == 'flickr':
        from gasp.sde.dsn.flickr import photos_location
    
    elif datasource == 'facebook':
        from gasp.sde.dsn.fb.places import places_by_query
    
    keywords = obj_to_lst(keywords)
    keywords = ["None"] if not keywords else keywords
    
    # Create Database to Store Data
    create_db(db, overwrite=True, api='psql')
    
    for city in inBuffers:
        # Get Smaller Buffers
        if "path" in inBuffers[city]:
            # Get X, Y and Radius
            from gasp.gt.prop.feat.bf import bf_prop
            
            __bfprop = bf_prop(
                inBuffers[city]["path"], inBuffers[city]["epsg"], isFile=True
            )
            
            inBuffers[city]["x"]      = __bfprop["X"]
            inBuffers[city]["y"]      = __bfprop["Y"]
            inBuffers[city]["radius"] = __bfprop["R"]
        
        inBuffers[city]["list_buffer"] = [{
            'X' : inBuffers[city]["x"], 'Y' : inBuffers[city]["y"],
            'RADIUS' : inBuffers[city]['radius'], 'cardeal' : 'major'
        }] + get_sub_buffers(
            inBuffers[city]["x"], inBuffers[city]["y"],
            inBuffers[city]["radius"]
        )
        
        # Smaller Buffers to File
        multiBuffer = os.path.join(workspace, 'buffers_{}.shp'.format(city))
        dic_buffer_array_to_shp(
            inBuffers[city]["list_buffer"], multiBuffer,
            inBuffers[city]['epsg'], fields={'cardeal' : ogr.OFTString}
        )
        
        # Retrive data for each keyword and buffer
        # Record these elements in one dataframe
        c       = None
        tblData = None
        for bf in inBuffers[city]["list_buffer"]:
            for k in keywords:
                if datasource == 'flickr':
                    tmpData = photos_location(
                        bf, inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsg_out=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False
                    )
                
                elif datasource == 'facebook':
                    tmpData = places_by_query(
                        bf, inBuffers[city]["epsg"],
                        keyword=k if k != 'None' else None,
                        epsgOut=inBuffers[city]["epsg"],
                        onlySearchAreaContained=False
                    )
                
                if type(tmpData) == int:
                    print("NoData finded for buffer '{}' and keyword '{}'".format(
                        bf['cardeal'], k
                    ))
                    
                    continue
                
                tmpData["keyword"]   = k
                tmpData["buffer_or"] = bf["cardeal"]
                
                if not c:
                    tblData = tmpData
                    c = 1
                else:
                    tblData = tblData.append(tmpData, ignore_index=True)
        
        inBuffers[city]["data"] = tblData
        
        # Get data columns names
        cols = inBuffers[city]["data"].columns.values
        dataColumns = [
            c for c in cols if c != 'geom' and c != 'keyword' \
            and c != 'buffer_or' and c != 'geometry'
        ]
        
        # Send data to PostgreSQL
        if 'geometry' in cols:
            cgeom = 'geometry'
        
        else:
            cgeom = 'geom'
        
        inBuffers[city]["table"] = 'tbldata_{}'.format(city)
        
        df_to_db(
            db, inBuffers[city]["data"],
            inBuffers[city]["table"], api='psql',
            epsg=inBuffers[city]["epsg"], geomType='POINT', colGeom=cgeom
        )
        
        # Send Buffers data to PostgreSQL
        inBuffers[city]["pg_buffer"] = shp_to_psql(
            db, multiBuffer, pgTable='buffers_{}'.format(city),
            api="shp2pgsql", srsEpsgCode=inBuffers[city]["epsg"]
        )
        
        inBuffers[city]["filter_table"] = q_to_ntbl(
            db, "filter_{}".format(inBuffers[city]["table"]), (
                "SELECT srcdata.*, "
                "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
                "AS intersect_buffer FROM ("
                    "SELECT {cols}, keyword, geom, "
                    "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
                    "FROM {pgtable} "
                    "GROUP BY {cols}, keyword, geom"
                ") AS srcdata, ("
                    "SELECT cardeal, geom AS bfg FROM {bftable}"
                ") AS buffersg "
                "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS TRUE "
                "GROUP BY {cols}, keyword, geom, extracted_buffer"
            ).format(
                cols    = ", ".join(dataColumns),
                pgtable = inBuffers[city]["table"],
                bftable = inBuffers[city]["pg_buffer"]
            ), api='psql'
        )
        
        inBuffers[city]["outside_table"] = q_to_ntbl(
            db, "outside_{}".format(inBuffers[city]["table"]), (
                "SELECT * FROM ("
                "SELECT srcdata.*, "
                "array_agg(buffersg.cardeal ORDER BY buffersg.cardeal) "
                "AS not_intersect_buffer FROM ("
                    "SELECT {cols}, keyword, geom, "
                    "array_agg(buffer_or ORDER BY buffer_or) AS extracted_buffer "
                    "FROM {pgtable} "
                    "GROUP BY {cols}, keyword, geom"
                ") AS srcdata, ("
                    "SELECT cardeal, geom AS bfg FROM {bftable}"
                ") AS buffersg "
                "WHERE ST_Intersects(srcdata.geom, buffersg.bfg) IS NOT TRUE "
                "GROUP BY {cols}, keyword, geom, extracted_buffer"
                ") AS foo WHERE array_length(not_intersect_buffer, 1) = 9"
            ).format(
                cols    = ", ".join(dataColumns),
                pgtable = inBuffers[city]["table"],
                bftable = inBuffers[city]["pg_buffer"]
            ), api='psql'
        )
        
        # Union these two tables
        inBuffers[city]["table"] = q_to_ntbl(db, "data_{}".format(city), (
            "SELECT * FROM {intbl} UNION ALL "
            "SELECT {cols}, keyword, geom, extracted_buffer, "
            "CASE WHEN array_length(not_intersect_buffer, 1) = 9 "
            "THEN '{array_symbol}' ELSE not_intersect_buffer END AS "
            "intersect_buffer FROM {outbl}"
        ).format(
            intbl        = inBuffers[city]["filter_table"],
            outbl        = inBuffers[city]["outside_table"],
            cols         = ", ".join(dataColumns),
            array_symbol = '{' + '}'
        ), api='psql')
        
        """
        Get Buffers table with info related:
        -> pnt_obtidos = nr pontos obtidos usando esse buffer
        -> pnt_obtidos_fora = nt pontos obtidos fora desse buffer, mas 
        obtidos com ele
        -> pnt_intersect = nt pontos que se intersectam com o buffer
        -> pnt_intersect_non_obtain = nr pontos que se intersectam mas nao 
        foram obtidos como buffer
        """
        inBuffers[city]["pg_buffer"] = q_to_ntbl(
            db, "dt_{}".format(inBuffers[city]["pg_buffer"]), (
                "SELECT main.*, get_obtidos.pnt_obtidos, "
                "obtidos_fora.pnt_obtidos_fora, intersecting.pnt_intersect, "
                "int_not_obtained.pnt_intersect_non_obtain "
                "FROM {bf_table} AS main "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.extracted_buffer) "
                    "GROUP BY gid, cardeal"
                ") AS get_obtidos ON main.gid = get_obtidos.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_obtidos_fora "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.extracted_buffer) "
                    "WHERE ST_Intersects(bf.geom, dt.geom) IS NOT TRUE "
                    "GROUP BY gid, cardeal"
                ") AS obtidos_fora ON main.gid = obtidos_fora.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.intersect_buffer) "
                    "GROUP BY gid, cardeal"
                ") AS intersecting ON main.gid = intersecting.gid "
                "LEFT JOIN ("
                    "SELECT gid, cardeal, COUNT(gid) AS pnt_intersect_non_obtain "
                    "FROM {bf_table} AS bf "
                    "INNER JOIN {dt_table} AS dt "
                    "ON bf.cardeal = ANY(dt.intersect_buffer) "
                    "WHERE NOT (bf.cardeal = ANY(dt.extracted_buffer)) "
                    "GROUP BY gid, cardeal"
                ") AS int_not_obtained "
                "ON main.gid = int_not_obtained.gid "
                "ORDER BY main.gid"
            ).format(
                bf_table = inBuffers[city]["pg_buffer"],
                dt_table = inBuffers[city]["table"]
            ), api='psql'
        )
        
        """
        Get Points table with info related:
        -> nobtido = n vezes um ponto foi obtido
        -> obtido_e_intersect = n vezes um ponto foi obtido usando um buffer 
        com o qual se intersecta
        -> obtido_sem_intersect = n vezes um ponto foi obtido usando um buffer
        com o qual nao se intersecta
        -> nintersect = n vezes que um ponto se intersecta com um buffer
        -> intersect_sem_obtido = n vezes que um ponto nao foi obtido apesar
        de se intersectar com o buffer
        """
        inBuffers[city]["table"] = q_to_ntbl(
            db, "info_{}".format(city), (
                "SELECT {cols}, dt.keyword, dt.geom, "
                "CAST(dt.extracted_buffer AS text) AS extracted_buffer, "
                "CAST(dt.intersect_buffer AS text) AS intersect_buffer, "
                "array_length(extracted_buffer, 1) AS nobtido, "
                "SUM(CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END) AS obtido_e_intersect, "
                "(array_length(extracted_buffer, 1) - SUM("
                    "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END)) AS obtido_sem_intersect, "
                "array_length(intersect_buffer, 1) AS nintersect, "
                "(array_length(intersect_buffer, 1) - SUM("
                    "CASE WHEN ST_Intersects(bf.geom, dt.geom) IS TRUE "
                    "THEN 1 ELSE 0 END)) AS intersect_sem_obtido "
                "FROM {dt_table} AS dt "
                "INNER JOIN {bf_table} AS bf "
                "ON bf.cardeal = ANY(dt.extracted_buffer) "
                "GROUP BY {cols}, dt.keyword, dt.geom, "
                "dt.extracted_buffer, dt.intersect_buffer"
            ).format(
                dt_table = inBuffers[city]["table"],
                bf_table = inBuffers[city]["pg_buffer"],
                cols     = ", ".join(["dt.{}".format(x) for x in dataColumns])
            ), api='psql'
        )
        
        # Export Results
        dbtbl_to_shp(
            db, inBuffers[city]["table"], 'geom',
            os.path.join(workspace, "{}.shp".format(inBuffers[city]["table"])),
            api='psql', epsg=inBuffers[city]["epsg"]
        )
        
        dbtbl_to_shp(
            db, inBuffers[city]["pg_buffer"], 'geom',
            os.path.join(workspace, "{}.shp".format(inBuffers[city]["pg_buffer"])),
            api='psql', epsg=inBuffers[city]["epsg"]
        )
    
    return inBuffers
Ejemplo n.º 25
0
def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE):
    """
    Search for data in DSN and other platforms by cell
    """
    
    import time;
    from gasp.gt.fmshp          import shp_to_obj
    from gasp.sql.db            import create_db
    from gasp.sde.dsn.fb.places import places_by_query
    from gasp.g.prj             import df_prj
    from gasp.pyt.df.to         import merge_df
    from gasp.gt.toshp.db       import dbtbl_to_shp
    from gasp.sql.to            import q_to_ntbl
    from gasp.sql.to            import df_to_db
    
    # Open GRID SHP
    GRID_DF = shp_to_obj(GRID_PNT)
    GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF
    
    GRID_DF["lng"]     = GRID_DF.geometry.x.astype(float)
    GRID_DF["lat"]     = GRID_DF.geometry.y.astype(float)
    GRID_DF["grid_id"] = GRID_DF.index
    
    # GET DATA
    RESULTS = []
    def get_data(row, datasrc):
        if datasrc == 'facebook':
            d = places_by_query(
                {'x' : row.lng, 'y' : row.lat, 'r' : RADIUS}, 4326,
                keyword=None, epsgOut=EPSG, _limit='100',
                onlySearchAreaContained=None
            )
        
        else:
            raise ValueError('{} as datasource is not a valid value'.format(datasrc))
        
        if type(d) == int:
            return
        
        d['grid_id'] = row.grid_id
        
        RESULTS.append(d)
        
        time.sleep(5)
    
    GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1)
    
    RT = merge_df(RESULTS)
    
    # Create DB
    create_db(db, overwrite=True, api='psql')
    
    # Send Data to PostgreSQL
    df_to_db(
        db, RT, "{}_data".format(DATA_SOURCE),
        EPSG, "POINT",
        colGeom='geometry' if 'geometry' in RT.columns.values else 'geom'
    )
    
    COLS = [
        x for x in RT.columns.values if x != "geometry" and \
        x != 'geom' and x != "grid_id"
    ] + ["geom"]
    
    GRP_BY_TBL = q_to_ntbl(db, "{}_grpby".format(DATA_SOURCE), (
        "SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id "
        "FROM {dtsrc}_data GROUP BY {cols}"
    ).format(cols=", ".join(COLS), dtsrc=DATA_SOURCE), api='psql')
    
    dbtbl_to_shp(
        db, GRP_BY_TBL, "geom", OUTPUT_TABLE,
        api="psql", epsg=EPSG
    )
    
    return OUTPUT_TABLE
Ejemplo n.º 26
0
Archivo: k.py Proyecto: jasp382/gasp
def multiCols_FK_to_singleCol(db,
                              tbl_wPk,
                              pkCol,
                              tbl_multiFk,
                              fkCols,
                              newTable,
                              colsSel=None,
                              whrCls=None):
    """
    For two tables as:
    
    Main table:
    PK | col_1 | col_2 | col_n
    1  |   0   |   0   |   0
    2  |   1   |   1   |   1
    3  |   0   |   2   |   2
    4  |   1   |   2   |   3
    
    Table with a foreign key with several columns:
    col_1 | col_2 | col_n
      0   |   0   |   0
      0   |   0   |   0
      0   |   2   |   2
      1   |   1   |   1
      1   |   2   |   3
      1   |   1   |   1
    
    Create a new table with a foreign key in a single column:
    col_1 | col_2 | col_n | FK
      0   |   0   |   0   | 1
      0   |   0   |   0   | 1
      0   |   2   |   2   | 3
      1   |   1   |   1   | 2
      1   |   2   |   3   | 4
      1   |   1   |   1   | 2
    
    In this example:
    pk_field = PK
    cols_foreign = {col_1 : col_1, col_2: col_2, col_n : col_n}
    (Keys are cols of tbl_wPk and values are cols of the tbl_multiFk
    """

    if type(fkCols) != dict:
        raise ValueError("fkCols parameter should be a dict")

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    colsSel = obj_to_lst(colsSel)

    q = (
        "SELECT {tpk}.{pk}, {cls} FROM {tfk} "
        "INNER JOIN {tpk} ON {tblRel}{whr}"
    ).format(
        tpk=tbl_wPk, pk=pkCol, tfk=tbl_multiFk,
        cls="{}.*".format(tbl_multiFk) if not colsSel else \
            ", ".join(["{}.{}".format(tbl_wPk, pkCol) for c in colsSel]),
        tblRel=" AND ".join([
            "{}.{} = {}.{}".format(
                tbl_multiFk, fkCols[k], tbl_wPk, k
            ) for k in fkCols
        ]),
        whr="" if not whrCls else " WHERE {}".format(whrCls)
    )

    outbl = q_to_ntbl(db, newTable, q, api='psql')

    return outbl
Ejemplo n.º 27
0
Archivo: ref.py Proyecto: jasp382/gasp
def select_using_excel_refs(db_name,
                            excel_file,
                            sheet_name,
                            pgtable,
                            ref_fields,
                            tableInRef,
                            tableOutRef=None):
    """
    Split PGTABLE using references in excel table
    
    Create two tables:
    * One with similar rows - columns combination are in excel table;
    * One with rows not in excel table.
    
    TODO: Check if it's works. 
    """

    from gasp.fm import tbl_to_obj
    from gasp.sql.i import cols_type
    from gasp.sql.to import q_to_ntbl

    def to_and(row, cols, ctype):
        def get_equal(_type):
            return '{}=\'{}\'' if _type == str else '{}={}'

        row['AND_E'] = ' AND '.join(
            get_equal(ctype[col]).format(col, row[col]) for col in cols)

        row['AND_E'] = '(' + row['AND_E'] + ')'

        return row

    # Get excel data
    table = tbl_to_obj(excel_file, sheet=sheet_name)

    # Get reference fields type
    TYPE_COLS = cols_type(db_name, pgtable)

    table = table.apply(lambda x: to_and(x, ref_fields, TYPE_COLS))

    whr_equal = ' OR '.join(table['AND_E'])

    q_to_ntbl(db_name,
              tableInRef,
              "SELECT * FROM {} WHERE {}".format(pgtable, whr_equal),
              api='psql')

    if tableOutRef:
        COLS_RELATION = " AND ".join([
            "{ft}.{f} = {st}.{f}".format(ft=pgtable, f=col, st=tableInRef)
            for col in TYPE_COLS
        ])

        q_to_ntbl(db_name,
                  tableOutRef,
                  ("SELECT {ft}.* FROM {ft} LEFT JOIN {st} ON "
                   "{rel} WHERE {st}.{c} IS NULL").format(
                       ft=pgtable,
                       st=tableInRef,
                       rel=COLS_RELATION,
                       c=TYPE_COLS.keys()[0]),
                  api='psql')
Ejemplo n.º 28
0
Archivo: time.py Proyecto: jasp382/gasp
def del_rows_by_temporal_proximity(db,
                                   table,
                                   entity_fields,
                                   day_field,
                                   hour_field,
                                   hour_decimal,
                                   minute_field,
                                   second_field,
                                   time_tolerance,
                                   outresult,
                                   exclusionRows=None):
    """
    Exclude rows from one pgtable within some temporal interval from the
    previous row.
    
    Table structure should be
    entity |     day    | hour | minute | seconds | hour_decimal
      0    | 2018-01-02 |  5   |   X    |    X    |     5,10
      0    | 2018-01-03 |  4   |   X    |    X    |     4,15
      0    | 2018-01-02 |  5   |   X    |    X    |     5,12
      0    | 2018-01-02 |  5   |   X    |    X    |     5,8
      1    | 2018-01-02 |  4   |   X    |    X    |     4,10
      1    | 2018-01-02 |  5   |   X    |    X    |     5,12
      1    | 2018-01-02 |  4   |   X    |    X    |     4,20
      1    | 2018-01-02 |  4   |   X    |    X    |     4,12
      1    | 2018-01-02 |  4   |   X    |    X    |     4,6
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    entity_fields = obj_to_lst(entity_fields)

    if not entity_fields:
        raise ValueError("entity_fields value is not valid!")

    if exclusionRows:
        # Get Rows deleted in table

        sql = (
            "SELECT *, ({hourDec} - previous_hour) AS deltatime FROM ("
            "SELECT *, {lag_entity}, "
            "LAG({hourDec}) OVER(PARTITION BY "
            "{entityCols}, {dayF} ORDER BY "
            "{entityCols}, {dayF}, {hourF}, {minutesF}, {secondsF}"
            ") AS previous_hour "
            "FROM {mtable} ORDER BY {entityCols}, {dayF}, "
            "{hourF}, {minutesF}, {secondsF}"
            ") AS w_previous_tbl "
            "WHERE previous_hour IS NOT NULL AND "
            "({hourDec} - previous_hour) < {tol} / 60.0"
        ).format(
            hourDec=hour_decimal,
            lag_entity=", ".join([
                "LAG({cl}) OVER(PARTITION BY {ent}, {d} ORDER BY {ent}, {d}, {h}, {m}, {s}) AS prev_{cl}"
                .format(cl=c,
                        ent=", ".join(entity_fields),
                        d=day_field,
                        h=hour_field,
                        m=minute_field,
                        s=second_field) for c in entity_fields
            ]),
            entityCols=", ".join(entity_fields),
            dayF=day_field,
            hourF=hour_field,
            minutesF=minute_field,
            secondsF=second_field,
            mtable=table,
            tol=str(time_tolerance))

        q_to_ntbl(db, exclusionRows, sql, api='psql')

    # Get rows outside the given time tolerance
    sql = (
        "SELECT *, ({hourDec} - previous_hour) AS deltatime FROM ("
        "SELECT *, {lag_entity}, "
        "LAG({hourDec}) OVER(PARTITION BY {entityCols}, {dayF} ORDER BY "
        "{entityCols}, {dayF}, {hourF}, {minutesF}, "
        "{secondsF}) AS previous_hour "
        "FROM {mtable} ORDER BY {entityCols}, {dayF}, {hourF}, "
        "{minutesF}, {secondsF}"
        ") AS w_previous_tbl "
        "WHERE ({hourDec} - previous_hour) IS NULL OR "
        "({hourDec} - previous_hour) > {tol} / 60.0"
    ).format(
        hourDec=hour_decimal,
        lag_entity=", ".join([
            "LAG({cl}) OVER(PARTITION BY {ent}, {d} ORDER BY {ent}, {d}, {h}, {m}, {s}) AS prev_{cl}"
            .format(cl=c,
                    ent=", ".join(entity_fields),
                    d=day_field,
                    h=hour_field,
                    m=minute_field,
                    s=second_field) for c in entity_fields
        ]),
        entityCols=", ".join(entity_fields),
        dayF=day_field,
        hourF=hour_field,
        minutesF=minute_field,
        secondsF=second_field,
        mtable=table,
        tol=str(time_tolerance))

    q_to_ntbl(db, outresult, sql, api='psql')

    return outresult
Ejemplo n.º 29
0
def osm_to_relationaldb(osmData,
                        inSchema,
                        osmGeoTbl,
                        osmCatTbl,
                        osmRelTbl,
                        outSQL=None,
                        db_name=None):
    """
    PostgreSQL - OSM Data to Relational Model
    
    TODO: Just work for one geom table at once
    
    E.g.
    osmData = '/home/jasp/flainar/osm_centro.xml'
    
    inSchema = {
        "TBL" : ['points', 'lines', 'multipolygons'],
        'FID' : 'CAST(osm_id AS bigint)',
        "COLS" : [
            'name',
            "ST_X(wkb_geometry) AS longitude",
            "ST_Y(wkb_geometry) AS latitude",
            "wkb_geometry AS geom",
            "NULL AS featurecategoryid",
            "NULL AS flainarcategoryid",
            "NULL AS createdby",
            "NOW() AS createdon",
            "NULL AS updatedon",
            "NULL AS deletedon"
        ],
        "NOT_KEYS" : [
            'ogc_fid', 'osm_id', 'name', "wkb_geometry",
            'healthcare2', 'other_tags'
        ]
    }
    
    osmGeoTbl = {"TBL" : 'position', "FID" : 'positionid'}
    
    osmCatTbl = {
        "TBL" : 'osmcategory', "FID" : "osmcategoryid",
        "KEY_COL" : "keycategory", "VAL_COL" : "value",
        "COLS" : [
            "NULL AS createdby", "NOW() AS createdon",
            "NULL AS updatedon", "NULL AS deletedon"
        ]
    }
    
    osmRelTbl = {
        "TBL" : "position_osmcat", "FID" : 'pososmcatid'
    }
    """

    from gasp.pyt import obj_to_lst
    from gasp.pyt.oss import fprop
    from gasp.sql.i import cols_name
    from gasp.sql.to import q_to_ntbl
    from gasp.sql.db import create_db

    inSchema["TBL"] = obj_to_lst(inSchema["TBL"])

    # Create DB
    db = create_db(fprop(osmData, 'fn') if not db_name else db_name,
                   api='psql')

    # Send OSM data to Database
    osm_to_psql(osmData, db)

    # Get KEYS COLUMNS
    transcols = {}
    for tbl in inSchema["TBL"]:
        transcols[tbl] = [
            c for c in cols_name(db, tbl, sanitizeSpecialWords=None)
            if c not in inSchema["NOT_KEYS"]
        ]

    # Create osmGeoTbl
    osmgeotbl = [
        q_to_ntbl(db,
                  osmGeoTbl[tbl]['TBL'],
                  ("SELECT {} AS {}, {} FROM {}").format(
                      inSchema["FID"], osmGeoTbl[tbl]["FID"],
                      ", ".join(inSchema["COLS"]), tbl),
                  api='psql') for tbl in inSchema["TBL"]
    ]

    # Create OSM categories table
    qs = []
    for tbl in inSchema["TBL"]:
        qs.extend([
            ("SELECT '{keyV}' AS {keyC}, CAST({t}.{keyV} AS text) AS {valC} "
             "FROM {t} WHERE {t}.{keyV} IS NOT NULL "
             "GROUP BY {t}.{keyV}").format(keyV=c,
                                           t=tbl,
                                           keyC=osmCatTbl["KEY_COL"],
                                           valC=osmCatTbl["VAL_COL"])
            for c in transcols[tbl]
        ])

    osmcatbl = q_to_ntbl(
        db,
        osmCatTbl["TBL"],
        ("SELECT row_number() OVER(ORDER BY {keyC}) "
         "AS {osmcatid}, {keyC}, {valC}{ocols} "
         "FROM ({q}) AS foo").format(
             q="SELECT {k}, {v} FROM ({t}) AS kvtbl GROUP BY {k}, {v}".format(
                 k=osmCatTbl["KEY_COL"],
                 v=osmCatTbl["VAL_COL"],
                 t=" UNION ALL ".join(qs),
             ) if len(inSchema["TBL"]) > 1 else " UNION ALL ".join(qs),
             keyC=osmCatTbl["KEY_COL"],
             osmcatid=osmCatTbl["FID"],
             valC=osmCatTbl["VAL_COL"],
             ocols="" if "COLS" not in osmCatTbl else ", {}".format(", ".join(
                 osmCatTbl["COLS"]))),
        api='psql')

    # Create relation table
    osmreltbl = []
    for tbl in inSchema["TBL"]:
        qs = [(
            "SELECT {fid}, '{keyV}' AS key, CAST({t}.{keyV} AS text) AS osmval "
            "FROM {t} WHERE {t}.{keyV} IS NOT NULL").format(
                fid=inSchema["FID"], keyV=c, t=tbl) for c in transcols[tbl]]

        osmreltbl.append(
            q_to_ntbl(
                db,
                osmRelTbl[tbl]["TBL"],
                ("SELECT foo.{fid} AS {nfid}, catbl.{osmcatfid} "
                 "FROM ({mtbl}) AS foo INNER JOIN {catTbl} AS catbl "
                 "ON foo.key = catbl.{catkey} AND foo.osmval = catbl.{catval}"
                 ).format(mtbl=" UNION ALL ".join(qs),
                          fid=inSchema["FID"],
                          nfid=osmRelTbl[tbl]["FID"],
                          catTbl=osmCatTbl["TBL"],
                          osmcatfid=osmCatTbl["FID"],
                          catkey=osmCatTbl["KEY_COL"],
                          catval=osmCatTbl["VAL_COL"]),
                api='psql'))

    if not outSQL:
        return osmgeotbl, osmcatbl, osmreltbl
    else:
        from gasp.sql.fm import dump_tbls

        return dump_tbls(db, osmgeotbl + [osmcatbl] + osmreltbl, outSQL)
Ejemplo n.º 30
0
Archivo: prox.py Proyecto: jasp382/gasp
def st_near(db,
            inTbl,
            inGeom,
            nearTbl,
            nearGeom,
            output,
            near_col='near',
            api='psql',
            whrNear=None,
            outIsFile=None,
            until_dist=None,
            cols_in_tbl=None,
            intbl_pk=None,
            cols_near_tbl=None):
    """
    Near tool for PostGIS and Spatialite

    api options:
    * psql
    * splite or spatialite
    """

    if api == 'psql' and not intbl_pk:
        from gasp.pyt import obj_to_lst
        from gasp.sql.to import q_to_ntbl

        _out = q_to_ntbl(
            db,
            output,
            ("SELECT m.*, ST_Distance(m.{ingeom}, j.geom) AS {distCol} "
             "FROM {t} AS m, ("
             "SELECT ST_UnaryUnion(ST_Collect({neargeom})) AS geom "
             "FROM {tblNear}{nearwhr}"
             ") AS j").format(ingeom=inGeom,
                              distCol=near_col,
                              t=inTbl,
                              neargeom=nearGeom,
                              tblNear=nearTbl),
            api='psql')

        return output

    elif api == 'psql' and intbl_pk:
        from gasp.pyt import obj_to_lst
        from gasp.sql.to import q_to_ntbl

        _out = q_to_ntbl(
            db,
            output,
            ("SELECT DISTINCT ON (s.{col_pk}) "
             "{inTblCols}, {nearTblCols}"
             "ST_Distance("
             "s.{ingeomCol}, h.{negeomCol}"
             ") AS {nearCol} FROM {in_tbl} AS s "
             "LEFT JOIN {near_tbl} AS h "
             "ON ST_DWithin(s.{ingeomCol}, h.{negeomCol}, {dist_v}) "
             "ORDER BY s.{col_pk}, ST_Distance(s.{ingeomCol}, h.{negeomCol})"
             ).format(
                 col_pk=intbl_pk,
                 inTblCols="s.*" if not cols_in_tbl else ", ".join(
                     ["s.{}".format(x) for x in obj_to_lst(cols_in_tbl)]),
                 nearTblCols="" if not cols_near_tbl else ", ".join(
                     ["h.{}".format(x)
                      for x in obj_to_lst(cols_near_tbl)]) + ", ",
                 ingeomCol=inGeom,
                 negeomCol=nearGeom,
                 nearCol=near_col,
                 in_tbl=inTbl,
                 near_tbl=nearTbl,
                 dist_v="100000" if not until_dist else until_dist),
            api='psql')

        return output

    elif api == 'splite' or api == 'spatialite':
        Q = ("SELECT m.*, ST_Distance(m.{ingeom}, j.geom) AS {distCol} "
             "FROM {t} AS m, ("
             "SELECT ST_UnaryUnion(ST_Collect({neargeom})) AS geom "
             "FROM {tblNear}{nearwhr}"
             ") AS j").format(
                 ingeom=inGeom,
                 distCol=near_col,
                 t=inTbl,
                 neargeom=nearGeom,
                 tblNear=nearTbl,
                 nearwhr="" if not whrNear else " WHERE {}".format(whrNear))

        if outIsFile:
            from gasp.gt.attr import sel_by_attr

            sel_by_attr(db, Q, output, api_gis='ogr')

        else:
            from gasp.sql.to import q_to_ntbl

            q_to_ntbl(db, output, Q, api='ogr2ogr')

        return output

    else:
        raise ValueError("api {} does not exist!".format(api))