Ejemplo n.º 1
0
def change_field_type(db, table, fields, outable, cols=None):
    """
    Imagine a table with numeric data saved as text. This method convert
    that numeric data to a numeric field.
    
    fields = {'field_name' : 'field_type'}
    """

    from gasp.sql.i import cols_name

    if not cols:
        cols = cols_name(db, table)

    else:
        from gasp.pyt import obj_to_lst

        cols = obj_to_lst(cols)

    select_fields = [f for f in cols if f not in fields]

    con = sqlcon(db)

    # Create new table with the new field with converted values
    cursor = con.cursor()

    cursor.execute(('CREATE TABLE {} AS SELECT {}, {} FROM {}').format(
        outable, ', '.join(select_fields), ', '.join([
            'CAST({f_} AS {t}) AS {f_}'.format(f_=f, t=fields[f])
            for f in fields
        ]), table))

    con.commit()
    cursor.close()
    con.close()
Ejemplo n.º 2
0
def del_cols(lyr, cols, api='grass', lyrn=1):
    """
    Remove Columns from Tables
    """

    from gasp.pyt import obj_to_lst

    cols = obj_to_lst(cols)

    if api == 'grass':
        from gasp import exec_cmd

        rcmd = exec_cmd(("v.db.dropcolumn map={} layer={} columns={} "
                         "--quiet").format(lyr, str(lyrn), ','.join(cols)))

    elif api == 'pygrass':
        from grass.pygrass.modules import Module

        m = Module("v.db.dropcolumn",
                   map=lyr,
                   layer=lyrn,
                   columns=cols,
                   quiet=True,
                   run_=True)

    else:
        raise ValueError("API {} is not available".format(api))

    return lyr
Ejemplo n.º 3
0
def geom_to_points(db,
                   table,
                   geomCol,
                   outTable,
                   selCols=None,
                   newGeomCol=None):
    """
    Convert a Polygon/Polyline Geometry to Points
    
    Equivalent to feature to point tool
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    selCols = obj_to_lst(selCols)

    Q = ("SELECT {cols}(ST_DumpPoints({geom})).geom AS {newCol} "
         "FROM {tbl}").format(
             cols="" if not selCols else "{}, ".format(", ".join(selCols)),
             geom=geomCol,
             newCol="geom" if not newGeomCol else newGeomCol,
             tbl=table)

    return q_to_ntbl(db, outTable, Q, api='psql')
Ejemplo n.º 4
0
Archivo: tw.py Proyecto: jasp382/gasp
def tweets_to_df(keyword=None,
                 inGeom=None,
                 epsg=None,
                 LANG='pt',
                 NTWEETS=1000,
                 tweetType='mixed',
                 apiKey=None,
                 dropFields=None):
    """
    Search for Tweets and Export them to XLS
    """

    from gasp.pyt import obj_to_lst

    if not inGeom and not keyword:
        raise ValueError('inGeom or keyword, one of them are required')

    if inGeom and not epsg:
        raise ValueError('inGeom implies epsg')

    if inGeom:
        from gasp.gt.prop.feat.bf import getBufferParam

        x, y, dist = getBufferParam(inGeom, epsg, outSRS=4326)

        dist = float(dist) / 1000

    else:
        x, y, dist = None, None, None

    data = search_tweets(lat=y,
                         lng=x,
                         radius=dist,
                         keyword=keyword,
                         NR_ITEMS=NTWEETS,
                         only_geo=None,
                         __lang=LANG,
                         resultType=tweetType,
                         key=apiKey)

    try:
        if not data:
            return 0
    except:
        pass

    if keyword:
        data["keyword"] = keyword

    else:
        data["keyword"] = 'nan'

    dropFields = obj_to_lst(dropFields)

    if dropFields:
        data.drop(dropFields, axis=1, inplace=True)

    return data
Ejemplo n.º 5
0
def by_query(search_type,
                    keyword=None, x_center=None, y_center=None, dist=None,
                    limit='100', face_fields=None):
    """
    Search data on facebook based on:
    - Keyword;
    - search type (user, page, event, place, placetopic);
    - location (center and distance from center);
    - limit (maximum number of users/pages/etc. to be returned)*.
    
    * Our default is 100, but the Facebook default is 60.
    
    Returns an array with the id of the data in facebook
    """
    
    import pandas
    from gasp.pyt    import obj_to_lst
    from gasp.fm.web import http_to_json
    
    # Deal with spaces in the keyword expression and with special characters
    keyword = keyword.replace(' ', '%20') if keyword and ' ' in keyword \
        else keyword
    
    face_fields = obj_to_lst(face_fields)

    URL = (
        '{graph}search?access_token={_id}|{scrt}'
        '{_q}{typ}{cnt}{dst}{lmt}{flds}'
    ).format(
        graph=FACEBOOK_GRAPH_URL,
        _id  = FACEBOOK_TOKEN['APP_ID'],
        scrt = FACEBOOK_TOKEN['APP_SECRET'],
        _q   = '' if not keyword else '&q={}'.format(keyword),
        typ  = '&type={}'.format(search_type),
        cnt  = '' if not x_center and not y_center else '&center={},{}'.format(
            y_center, x_center
        ),
        dst  = '' if not dist else '&distance={}'.format(dist),
        lmt  = '' if not limit else '&limit={}'.format(str(limit)),
        flds = '' if not face_fields else '&fields={}'.format(','.join(face_fields))
    )
    
    face_table = pandas.DataFrame(http_to_json(URL)['data'])
    
    if not face_table.shape[0]:
        return None
    
    face_table["url"] = "https://facebook.com//" + face_table["id"]
    
    if face_fields:
        if "location" in face_fields:
            face_table = pandas.concat([
                face_table.drop(["location"], axis=1),
                face_table["location"].apply(pandas.Series)
            ], axis=1)
    
    return face_table
Ejemplo n.º 6
0
Archivo: fm.py Proyecto: jasp382/gasp
def q_to_obj(dbname,
             query,
             db_api='psql',
             geomCol=None,
             epsg=None,
             of='df',
             cols=None):
    """
    Query database and convert data to Pandas Dataframe/GeoDataFrame
    
    API's Available:
    * psql;
    * sqlite;
    * mysql;

    output format options ("of" parameter):
    * df (Pandas Dataframe);
    * dict (Python Dict);
    """

    if not query.startswith('SELECT '):
        # Assuming query is a table name
        from gasp.pyt import obj_to_lst
        from gasp.sql.i import cols_name

        cols = cols_name(dbname, query) if not cols else \
            obj_to_lst(cols)

        query = "SELECT {} FROM {}".format(
            ", ".join(["{t}.{c} AS {c}".format(t=query, c=i) for i in cols]),
            query)

    if not geomCol:
        import pandas
        from gasp.sql.c import alchemy_engine

        pgengine = alchemy_engine(dbname, api=db_api)

        df = pandas.read_sql(query, pgengine, columns=None)

    else:
        from geopandas import GeoDataFrame
        from gasp.sql.c import sqlcon

        con = sqlcon(dbname, sqlAPI='psql')

        df = GeoDataFrame.from_postgis(
            query,
            con,
            geom_col=geomCol,
            crs="epsg:{}".format(str(epsg)) if epsg else None)

    if of == 'dict':
        df = df.to_dict(orient="records")

    return df
Ejemplo n.º 7
0
Archivo: oss.py Proyecto: jasp382/gasp
def del_file(_file):
    """
    Delete files if exists
    """

    from gasp.pyt import obj_to_lst

    for ff in obj_to_lst(_file):
        if os.path.isfile(ff) and os.path.exists(ff):
            os.remove(ff)
Ejemplo n.º 8
0
def shape_to_rst_wShapeCheck(inShp, maxCellNumber, desiredCellsizes, outRst,
                             inEPSG):
    """
    Convert one Feature Class to Raster using the cellsizes included
    in desiredCellsizes. For each cellsize, check if the number of cells
    exceeds maxCellNumber. The raster with lower cellsize but lower than
    maxCellNumber will be the returned raster
    """

    import os
    from gasp.pyt import obj_to_lst
    from gasp.gt.prop.rst import rst_shape

    desiredCellsizes = obj_to_lst(desiredCellsizes)
    if not desiredCellsizes:
        raise ValueError('desiredCellsizes does not have a valid value')

    workspace = os.path.dirname(outRst)

    RASTERS = [
        shp_to_rst(inShp, cellsize, -1,
                   os.path.join(workspace, 'tst_cell_{}.tif'.format(cellSize)),
                   inEPSG) for cellSize in desiredCellsizes
    ]

    tstShape = rst_shape(RASTERS, gisApi='gdal')

    for rst in tstShape:
        NCELLS = tstShape[rst][0] * tstShape[rst][1]
        tstShape[rst] = NCELLS

    NICE_RASTER = None
    for i in range(len(desiredCellsizes)):
        if tstShape[RASTERS[i]] <= maxCellNumber:
            NICE_RASTER = RASTERS[i]
            break

        else:
            continue

    if not NICE_RASTER:
        return None

    else:
        os.rename(NICE_RASTER, outRst)

        for rst in RASTERS:
            if os.path.isfile(rst) and os.path.exists(rst):
                os.remove(rst)

        return outRst
Ejemplo n.º 9
0
def txt_cols_to_col(db, inTable, columns, strSep, newCol, outTable=None):
    """
    Several text columns to a single column
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    mergeCols = obj_to_lst(columns)

    tblCols = cols_type(db, inTable, sanitizeColName=None, pyType=False)

    for col in mergeCols:
        if tblCols[col] != 'text' and tblCols[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    coalesce = ""
    for i in range(len(mergeCols)):
        if not i:
            coalesce += "COALESCE({}, '')".format(mergeCols[i])

        else:
            coalesce += " || '{}' || COALESCE({}, '')".format(
                strSep, mergeCols[i])

    if outTable:
        # Write new table
        colsToSelect = [_c for _c in tblCols if _c not in mergeCols]

        if not colsToSelect:
            sel = coalesce + " AS {}".format(newCol)
        else:
            sel = "{}, {}".format(", ".join(colsToSelect),
                                  coalesce + " AS {}".format(newCol))

        q_to_ntbl(db,
                  outTable,
                  "SELECT {} FROM {}".format(sel, inTable),
                  api='psql')

        return outTable

    else:
        # Add column to inTable
        from gasp.sql.tbl import update_table

        add_field(db, inTable, {newCol: 'text'})

        update_table(db, inTable, {newCol: coalesce})

        return inTable
Ejemplo n.º 10
0
Archivo: oss.py Proyecto: jasp382/gasp
def fprop(__file, prop, forceLower=None, fs_unit=None):
    """
    Return some property of file

    prop options:
    * filename or fn - return filename
    """

    from gasp.pyt import obj_to_lst

    prop = obj_to_lst(prop)

    result = {}

    if 'filename' in prop or 'fn' in prop:
        fn, ff = os.path.splitext(os.path.basename(__file))

        result['filename'] = fn

        if 'fileformat' in prop or 'fn' in prop:
            result['fileformat'] = ff

    elif 'fileformat' in prop or 'ff' in prop:
        result['fileformat'] = os.path.splitext(__file)[1]

    if 'filesize' in prop or 'fs' in prop:
        fs_unit = 'MB' if not fs_unit else fs_unit

        fs = os.path.getsize(__file)

        if fs_unit == 'MB':
            fs = (fs / 1024.0) / 1024

        elif fs_unit == 'KB':
            fs = fs / 1024.0

        result['filesize'] = fs

    if len(prop) == 1:
        if prop[0] == 'fn':
            return result['filename']
        elif prop[0] == 'ff':
            return result['fileformat']
        elif prop[0] == 'fs':
            return result['filesize']
        else:
            return result[prop[0]]
    else:
        return result
Ejemplo n.º 11
0
def distinct_val(db, pgtable, column):
    """
    Get distinct values in one column of one pgtable
    """
    
    from gasp.pyt    import obj_to_lst
    from gasp.sql.fm import q_to_obj
    
    data = q_to_obj(db,
        "SELECT {col} FROM {t} GROUP BY {col};".format(
            col=", ".join(obj_to_lst(column)), t=pgtable
        ), db_api='psql'
    ).to_dict(orient="records")
    
    return data
Ejemplo n.º 12
0
def trim_char_in_col(db,
                     pgtable,
                     cols,
                     trim_str,
                     outTable,
                     onlyTrailing=None,
                     onlyLeading=None):
    """
    Python implementation of the TRIM PSQL Function
    
    The PostgreSQL trim function is used to remove spaces or set of
    characters from the leading or trailing or both side from a string.
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    cols = obj_to_lst(cols)

    colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False)

    for col in cols:
        if colsTypes[col] != 'text' and colsTypes[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    colsToSelect = [_c for _c in colsTypes if _c not in cols]

    tail_lead_str = "" if not onlyTrailing and not onlyLeading else \
        "TRAILING " if onlyTrailing and not onlyLeading else \
        "LEADING " if not onlyTrailing and onlyLeading else ""

    trimCols = [
        "TRIM({tol}{char} FROM {c}) AS {c}".format(c=col,
                                                   tol=tail_lead_str,
                                                   char=trim_str)
        for col in cols
    ]

    if not colsToSelect:
        cols_to_select = "{}".format(", ".join(trimCols))
    else:
        cols_to_select = "{}, {}".format(", ".join(colsToSelect),
                                         ", ".join(trimCols))

    q_to_ntbl(db,
              outTable,
              "SELECT {} FROM {}".format(colsToSelect, pgtable),
              api='psql')
Ejemplo n.º 13
0
Archivo: to.py Proyecto: jasp382/gasp
def tbl_fromdb_todb(from_db, to_db, tables, qForTbl=None, api='pandas'):
    """
    Send PGSQL Tables from one database to other
    """
    
    from gasp.pyt import obj_to_lst
    
    api = 'pandas' if api != 'pandas' and api != 'psql' else api
    
    tables = obj_to_lst(tables)
    
    if api == 'pandas':
        from gasp.sql.fm import q_to_obj
    
        for table in tables:
            if not qForTbl:
                tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                    table), db_api='psql')
        
            else:
                if table not in qForTbl:
                    tblDf = q_to_obj(from_db, "SELECT * FROM {}".format(
                        table), db_api='psql')
            
                else:
                    tblDf = q_to_obj(from_db, qForTbl[table], db_api='psql')
        
            df_to_db(to_db, tblDf, table, api='psql')
    
    else:
        import os
        from gasp.pyt.oss import mkdir, del_folder
        from gasp.sql.fm  import dump_tbls
        from gasp.sql.to  import restore_tbls
        
        tmpFolder = mkdir(
            os.path.dirname(os.path.abspath(__file__)), randName=True
        )
        
        # Dump 
        sqlScript = dump_tbls(from_db, tables, os.path.join(
            tmpFolder, "tables_data.sql"
        ))
            
        # Restore
        restore_tbls(to_db, sqlScript, tables)
        
        del_folder(tmpFolder)
Ejemplo n.º 14
0
def show_duplicates_in_xls(db_name, table, pkCols, outFile, tableIsQuery=None):
    """
    Find duplicates and write these objects in a table
    """

    import pandas
    from gasp.pyt import obj_to_lst
    from gasp.sql.fm import q_to_obj
    from gasp.to import obj_to_tbl

    pkCols = obj_to_lst(pkCols)

    if not pkCols:
        raise ValueError("pkCols value is not valid")

    if not tableIsQuery:
        q = ("SELECT {t}.* FROM {t} INNER JOIN ("
             "SELECT {cls}, COUNT({cnt}) AS conta FROM {t} "
             "GROUP BY {cls}"
             ") AS foo ON {rel} "
             "WHERE conta > 1").format(t=table,
                                       cls=", ".join(pkCols),
                                       cnt=pkCols[0],
                                       rel=" AND ".join([
                                           "{t}.{c} = foo.{c}".format(t=table,
                                                                      c=col)
                                           for col in pkCols
                                       ]))

    else:
        q = ("SELECT foo.* FROM ({q_}) AS foo INNER JOIN ("
             "SELECT {cls}, COUNT({cnt}) AS conta "
             "FROM ({q_}) AS foo2 GROUP BY {cls}"
             ") AS jt ON {rel} "
             "WHERE conta > 1").format(q_=table,
                                       cls=", ".join(pkCols),
                                       cnt=pkCols[0],
                                       rel=" AND ".join([
                                           "foo.{c} = jt.{c}".format(c=x)
                                           for x in pkCols
                                       ]))

    data = q_to_obj(db_name, q, db_api='psql')

    obj_to_tbl(data, outFile)

    return outFile
Ejemplo n.º 15
0
def st_dissolve(db, table, geomColumn, outTable, whrClause=None,
                diss_cols=None, outTblIsFile=None, api='sqlite'):
    """
    Dissolve a Polygon table
    """
    
    from gasp.pyt import obj_to_lst
    
    diss_cols = obj_to_lst(diss_cols) if diss_cols else None
    geomcol = "geometry" if api == 'sqlite' else 'geom'
    
    sql = (
        "SELECT{selCols} ST_UnaryUnion(ST_Collect({geom})) AS {gout} "
        "FROM {tbl}{whr}{grpBy}"
    ).format(
        selCols="" if not diss_cols else " {},".format(", ".join(diss_cols)),
        geom=geomColumn, tbl=table,
        whr="" if not whrClause else " WHERE {}".format(whrClause),
        grpBy="" if not diss_cols else " GROUP BY {}".format(
            ", ".join(diss_cols)
        ), gout=geomcol
    )
    
    if outTblIsFile:
        if api == 'sqlite':
            from gasp.gt.attr import sel_by_attr
            
            sel_by_attr(db, sql, outTable, api_gis='ogr')
        
        elif api == 'psql':
            from gasp.gt.toshp.db import dbtbl_to_shp
            
            dbtbl_to_shp(
                db, table, geomColumn, outTable, api='pgsql2shp',
                tableIsQuery=True
            )
    
    else:
        from gasp.sql.to import q_to_ntbl
        
        q_to_ntbl(
            db, outTable, sql, api='ogr2ogr' if api == 'sqlite' else 'psql'
        )
    
    return outTable
Ejemplo n.º 16
0
def sql_proj(dbname,
             tbl,
             otbl,
             oepsg,
             cols=None,
             geomCol=None,
             newGeom=None,
             whr=None,
             new_pk=None):
    """
    Reproject geometric layer to another spatial reference system (srs)
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    geomCol = 'geom' if not geomCol else geomCol
    newGeom = 'geom' if not newGeom else newGeom

    if not cols:
        from gasp.sql.i import cols_name

        cols = cols_name(dbname, tbl)

        cols.remove(geomCol)

    else:
        cols = obj_to_lst(cols)

        if geomCol in cols and geomCol == newGeom:
            cols.remove(geomCol)
            cols.append('{c} AS old_{c}'.format(c=geomCol))

    Q = ("SELECT {}, ST_Transform({}, {}) AS {} "
         "FROM {}{}").format(", ".join(cols), geomCol, str(oepsg), newGeom,
                             tbl, "" if not whr else " WHERE {}".format(whr))

    otbl = q_to_ntbl(dbname, otbl, Q, api='psql')

    if new_pk:
        from gasp.sql.k import create_pk

        create_pk(dbname, otbl, new_pk)

    return otbl
Ejemplo n.º 17
0
def sel_where_groupByIs(db,
                        table,
                        groupByCols,
                        grpByOp,
                        grpByVal,
                        outTable,
                        filterWhere=None):
    """
    Select rows in table where the GROUP BY values of the groupByCols agrees with
    the statment formed by grpByOp and grpByVal
    
    For the following parameters:
    table=tst_table, groupByCols=[day, hour], grpByOp=>, grpByVal=1
    
    This method will create a new table using a query such
    SELECT tst_table.* FROM tst_table INNER JOIN (
        SELECT day, hour, COUNT(day) AS cnt_day FROM tst_table
        GROUP BY day, hour
    ) AS foo ON tst_table.day = foo.day AND tst_table.hour = foo.hour
    WHERE foo.cnt_day > 1
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.to import q_to_ntbl

    groupByCols = obj_to_lst(groupByCols)

    q = ("SELECT {t}.* FROM {t} INNER JOIN ("
         "SELECT {cls}, COUNT({col}) AS cnt_{col} "
         "FROM {t} GROUP BY {cls}"
         ") AS foo ON {jOn} "
         "WHERE foo.cnt_{col} {op} {val}{fwhr}").format(
             t=table,
             cls=", ".join(groupByCols),
             col=groupByCols[0],
             jOn=" AND ".join([
                 "{t}.{c} = foo.{c}".format(t=table, c=x) for x in groupByCols
             ]),
             op=grpByOp,
             val=grpByVal,
             fwhr="" if not filterWhere else " AND ({})".format(filterWhere))

    outTable = q_to_ntbl(db, outTable, q, api='psql')

    return outTable
Ejemplo n.º 18
0
def get_sheet_position(xlsObj, sheetNames):
    """
    Return sheet position by name
    """

    from gasp.pyt import obj_to_lst

    names = obj_to_lst(sheetNames)

    d = {}
    c = 0
    for sh in xlsObj.sheets():
        if sh.name in names:
            d[sh.name] = c

        c += 1

    return d
Ejemplo n.º 19
0
def geomext_to_rst_wShapeCheck(inGeom, maxCellNumber, desiredCellsizes, outRst,
                               inEPSG):
    """
    Convert one Geometry to Raster using the cellsizes included
    in desiredCellsizes. For each cellsize, check if the number of cells
    exceeds maxCellNumber. The raster with lower cellsize but lower than
    maxCellNumber will be the returned raster
    """

    import os
    from gasp.pyt import obj_to_lst

    desiredCellsizes = obj_to_lst(desiredCellsizes)
    if not desiredCellsizes:
        raise ValueError('desiredCellsizes does not have a valid value')

    # Get geom extent
    left, right, bottom, top = inGeom.GetEnvelope()

    # Check Rasters Shape for each desired cellsize
    SEL_CELLSIZE = None
    for cellsize in desiredCellsizes:
        # Get Row and Columns Number
        NROWS = int(round((top - bottom) / cellsize, 0))
        NCOLS = int(round((right - left) / cellsize, 0))

        NCELLS = NROWS * NCOLS

        if NCELLS <= maxCellNumber:
            SEL_CELLSIZE = cellsize
            break

    if not SEL_CELLSIZE:
        return None

    else:
        shpext_to_rst(inGeom,
                      outRst,
                      SEL_CELLSIZE,
                      epsg=inEPSG,
                      invalidResultAsNone=True)

        return outRst
Ejemplo n.º 20
0
def drop_col(db, pg_table, columns):
    """
    Delete column from pg_table
    """

    from gasp.pyt import obj_to_lst

    con = sqlcon(db)

    cursor = con.cursor()

    columns = obj_to_lst(columns)

    cursor.execute('ALTER TABLE {} {};'.format(
        pg_table, ', '.join(['DROP COLUMN {}'.format(x) for x in columns])))

    con.commit()
    cursor.close()
    con.close()
Ejemplo n.º 21
0
Archivo: dns.py Proyecto: jasp382/gasp
def kernel_density_for_field(points, fields, radius, folderoutput, template):
    """
    Run Kernel Density for every field in fields
    """

    import os
    from gasp.pyt import obj_to_lst

    fields = obj_to_lst(fields)

    if not fields: raise ValueError('fields value is not valid')

    for field in fields:
        kernel_density(
            points, field, radius, template,
            os.path.join(
                folderoutput,
                os.path.splitext(os.path.basename(points))[0] +
                '_{}.tif'.format(field)))
Ejemplo n.º 22
0
def exec_write_q(db_name, queries, api='psql'):
    """
    Execute Queries and save result in the database
    """

    from gasp.pyt import obj_to_lst

    qs = obj_to_lst(queries)

    if not qs:
        raise ValueError("queries value is not valid")

    if api == 'psql':
        from gasp.sql.c import sqlcon

        con = sqlcon(db_name)

        cs = con.cursor()

        for q in qs:
            cs.execute(q)

        con.commit()
        cs.close()
        con.close()

    elif api == 'sqlite':
        import sqlite3

        con = sqlite3.connect(db_name)
        cs = con.cursor()

        for q in qs:
            cs.execute(q)

        con.commit()
        cs.close()
        con.close()

    else:
        raise ValueError('API {} is not available'.format(api))
Ejemplo n.º 23
0
Archivo: i.py Proyecto: jasp382/gasp
def lst_views(db, schema='public', basename=None):
    """
    List Views in database
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.fm import q_to_obj

    basename = obj_to_lst(basename)

    basenameStr = "" if not basename else "{}".format(" OR ".join(
        ["{} LIKE '%%{}%%'".format("table_name", b) for b in basename]))

    views = q_to_obj(
        db,
        ("SELECT table_name FROM information_schema.views "
         "WHERE table_schema='{}'{}").format(
             schema, "" if not basename else " AND ({})".format(basenameStr)),
        db_api='psql')

    return views.table_name.tolist()
Ejemplo n.º 24
0
def replace_char_in_col(db, pgtable, cols, match_str, replace_str, outTable):
    """
    Replace char in all columns in cols for the value of replace_str
    
    Python implementation of the REPLACE PSQL Function
    """

    from gasp.pyt import obj_to_lst
    from gasp.sql.i import cols_type

    cols = obj_to_lst(cols)

    colsTypes = cols_type(db, pgtable, sanitizeColName=None, pyType=False)

    for col in cols:
        if colsTypes[col] != 'text' and colsTypes[col] != 'varchar':
            raise ValueError('{} should be of type text'.format(col))

    colsToSelect = [_c for _c in colsTypes if _c not in cols]

    colsReplace = [
        "REPLACE({c}, '{char}', '{nchar}') AS {c}".format(c=col,
                                                          char=match_str,
                                                          nchar=replace_str)
        for col in cols
    ]

    if not colsToSelect:
        cols_to_select = "{}".format(", ".join(colsReplace))
    else:
        cols_to_select = "{}, {}".format(", ".join(colsToSelect),
                                         ", ".join(colsReplace))

    q_to_ntbl(db,
              outTable,
              "SELECT {cols} FROM {tbl}".format(cols=cols_to_select,
                                                tbl=pgtable),
              api='psql')

    return outTable
Ejemplo n.º 25
0
def rst_val_to_points2(pntShp, listRasters):
    """
    Pick raster value for each point in pntShp
    """

    from osgeo import ogr
    from gasp.pyt import obj_to_lst
    from gasp.gt.prop.ff import drv_name

    listRasters = obj_to_lst(listRasters)

    shp = ogr.GetDriverByName(drv_name(pntShp)).Open(pnt, 0)

    lyr = shp.GetLayer()

    pntDict = {}
    for feat in lyr:
        geom = feat.GetGeometryRef()

        x, y = geom.GetX(), geom.GetY()

        l = []
        for rst in listRasters:
            img = gdal.Open(rst)
            geo_transform = img.GetGeoTransform()
            band = img.GetRasterBand(1)

            px = int((x - geo_transform[0]) / geo_transform[1])
            py = int((y - geo_transform[3]) / geo_transform[5])
            value = band.ReadAsArray(px, py, 1, 1)

            l.append(list(value)[0])

            del img, geo_transform, band, px, py

        pntDict[feat.GetFID()] = l

    shp.Destroy()

    return pntDict
Ejemplo n.º 26
0
Archivo: agg.py Proyecto: jasp382/gasp
def df_groupBy(df, grpCols, STAT=None, STAT_FIELD=None):
    """
    Group By Pandas Dataframe
    
    STAT OPTIONS:
    * MIN
    * MAX
    """

    from gasp.pyt import obj_to_lst

    grpCols = obj_to_lst(grpCols)

    if not grpCols:
        raise ValueError("grpCols value is not valid")

    if not STAT:
        newDf = df.groupby(grpCols, axis=0, as_index=False)

    else:
        if not STAT_FIELD:
            raise ValueError("To use STAT, you must specify STAT_FIELD")

        if STAT == 'MIN':
            newDf = df.groupby(grpCols, axis=0,
                               as_index=False)[STAT_FIELD].min()

        elif STAT == 'MAX':
            newDf = df.groupby(grpCols, axis=0,
                               as_index=False)[STAT_FIELD].max()

        elif STAT == 'SUM':
            newDf = df.groupby(grpCols, axis=0,
                               as_index=False)[STAT_FIELD].sum()

        else:
            raise ValueError("{} is not a valid option".format(STAT))

    return newDf
Ejemplo n.º 27
0
Archivo: fm.py Proyecto: jasp382/gasp
def dump_tbls(db, tables, outsql, startWith=None):
    """
    Dump one table into a SQL File
    """

    from gasp import exec_cmd
    from gasp.pyt import obj_to_lst
    from gasp.cons.psql import con_psql

    tbls = obj_to_lst(tables)

    if startWith:
        from gasp.sql.i import lst_tbl

        db_tbls = lst_tbl(db, api='psql')

        dtbls = []
        for t in db_tbls:
            for b in tbls:
                if t.startswith(b):
                    dtbls.append(t)

        tbls = dtbls

    condb = con_psql()

    outcmd = exec_cmd(("pg_dump -Fc -U {user} -h {host} -p {port} "
                       "-w {tbl} {db} > {out}").format(
                           user=condb["USER"],
                           host=condb["HOST"],
                           port=condb["PORT"],
                           db=db,
                           out=outsql,
                           tbl=" ".join(["-t {}".format(t) for t in tbls])))

    return outsql
Ejemplo n.º 28
0
def get_text_in_CssClass(url, classTag, cssCls, texTags=['p']):
    """
    Get text from tags inside a specific object with one tag (classTag) and
    CSS Class (cssCls)
    
    Not recursive: textTags must be direct child of the classTag/cssCls
    """

    import urllib2
    import re
    from bs4 import BeautifulSoup
    from gasp.pyt import obj_to_lst

    resp = urllib2.urlopen(url)

    html_doc = resp.read()

    soup = BeautifulSoup(html_doc, 'html.parser')

    data = soup.find_all(classTag, class_=cssCls)

    rslt = {}
    texTags = obj_to_lst(texTags)
    for node in data:
        for t in texTags:
            chld = node.findChildren(t, recursive=False)

            l = [re.sub('<[^>]+>', '', str(x)).strip('\n') for x in chld]

            if t not in rslt:
                rslt[t] = l

            else:
                rslt[t] += l

    return rslt
Ejemplo n.º 29
0
def col_to_timestamp(db,
                     inTbl,
                     dayCol,
                     hourCol,
                     minCol,
                     secCol,
                     newTimeCol,
                     outTbl,
                     selColumns=None,
                     whr=None):
    """
    Columns to timestamp column
    """

    from gasp.pyt import obj_to_lst

    selCols = obj_to_lst(selColumns)

    sql = ("SELECT {C}, TO_TIMESTAMP("
           "COALESCE(CAST({day} AS text), '') || ' ' || "
           "COALESCE(CAST({hor} AS text), '') || ':' || "
           "COALESCE(CAST({min} AS text), '') || ':' || "
           "COALESCE(CAST({sec} AS text), ''), 'YYYY-MM-DD HH24:MI:SS'"
           ") AS {TC} FROM {T}{W}").format(
               C="*" if not selCols else ", ".join(selCols),
               day=dayCol,
               hor=hourCol,
               min=minCol,
               sec=secCol,
               TC=newTimeCol,
               T=inTbl,
               W="" if not whr else " WHERE {}".format(whr))

    q_to_ntbl(db, outTbl, sql, api='psql')

    return outTbl
Ejemplo n.º 30
0
def osm_to_relationaldb(osmData,
                        inSchema,
                        osmGeoTbl,
                        osmCatTbl,
                        osmRelTbl,
                        outSQL=None,
                        db_name=None):
    """
    PostgreSQL - OSM Data to Relational Model
    
    TODO: Just work for one geom table at once
    
    E.g.
    osmData = '/home/jasp/flainar/osm_centro.xml'
    
    inSchema = {
        "TBL" : ['points', 'lines', 'multipolygons'],
        'FID' : 'CAST(osm_id AS bigint)',
        "COLS" : [
            'name',
            "ST_X(wkb_geometry) AS longitude",
            "ST_Y(wkb_geometry) AS latitude",
            "wkb_geometry AS geom",
            "NULL AS featurecategoryid",
            "NULL AS flainarcategoryid",
            "NULL AS createdby",
            "NOW() AS createdon",
            "NULL AS updatedon",
            "NULL AS deletedon"
        ],
        "NOT_KEYS" : [
            'ogc_fid', 'osm_id', 'name', "wkb_geometry",
            'healthcare2', 'other_tags'
        ]
    }
    
    osmGeoTbl = {"TBL" : 'position', "FID" : 'positionid'}
    
    osmCatTbl = {
        "TBL" : 'osmcategory', "FID" : "osmcategoryid",
        "KEY_COL" : "keycategory", "VAL_COL" : "value",
        "COLS" : [
            "NULL AS createdby", "NOW() AS createdon",
            "NULL AS updatedon", "NULL AS deletedon"
        ]
    }
    
    osmRelTbl = {
        "TBL" : "position_osmcat", "FID" : 'pososmcatid'
    }
    """

    from gasp.pyt import obj_to_lst
    from gasp.pyt.oss import fprop
    from gasp.sql.i import cols_name
    from gasp.sql.to import q_to_ntbl
    from gasp.sql.db import create_db

    inSchema["TBL"] = obj_to_lst(inSchema["TBL"])

    # Create DB
    db = create_db(fprop(osmData, 'fn') if not db_name else db_name,
                   api='psql')

    # Send OSM data to Database
    osm_to_psql(osmData, db)

    # Get KEYS COLUMNS
    transcols = {}
    for tbl in inSchema["TBL"]:
        transcols[tbl] = [
            c for c in cols_name(db, tbl, sanitizeSpecialWords=None)
            if c not in inSchema["NOT_KEYS"]
        ]

    # Create osmGeoTbl
    osmgeotbl = [
        q_to_ntbl(db,
                  osmGeoTbl[tbl]['TBL'],
                  ("SELECT {} AS {}, {} FROM {}").format(
                      inSchema["FID"], osmGeoTbl[tbl]["FID"],
                      ", ".join(inSchema["COLS"]), tbl),
                  api='psql') for tbl in inSchema["TBL"]
    ]

    # Create OSM categories table
    qs = []
    for tbl in inSchema["TBL"]:
        qs.extend([
            ("SELECT '{keyV}' AS {keyC}, CAST({t}.{keyV} AS text) AS {valC} "
             "FROM {t} WHERE {t}.{keyV} IS NOT NULL "
             "GROUP BY {t}.{keyV}").format(keyV=c,
                                           t=tbl,
                                           keyC=osmCatTbl["KEY_COL"],
                                           valC=osmCatTbl["VAL_COL"])
            for c in transcols[tbl]
        ])

    osmcatbl = q_to_ntbl(
        db,
        osmCatTbl["TBL"],
        ("SELECT row_number() OVER(ORDER BY {keyC}) "
         "AS {osmcatid}, {keyC}, {valC}{ocols} "
         "FROM ({q}) AS foo").format(
             q="SELECT {k}, {v} FROM ({t}) AS kvtbl GROUP BY {k}, {v}".format(
                 k=osmCatTbl["KEY_COL"],
                 v=osmCatTbl["VAL_COL"],
                 t=" UNION ALL ".join(qs),
             ) if len(inSchema["TBL"]) > 1 else " UNION ALL ".join(qs),
             keyC=osmCatTbl["KEY_COL"],
             osmcatid=osmCatTbl["FID"],
             valC=osmCatTbl["VAL_COL"],
             ocols="" if "COLS" not in osmCatTbl else ", {}".format(", ".join(
                 osmCatTbl["COLS"]))),
        api='psql')

    # Create relation table
    osmreltbl = []
    for tbl in inSchema["TBL"]:
        qs = [(
            "SELECT {fid}, '{keyV}' AS key, CAST({t}.{keyV} AS text) AS osmval "
            "FROM {t} WHERE {t}.{keyV} IS NOT NULL").format(
                fid=inSchema["FID"], keyV=c, t=tbl) for c in transcols[tbl]]

        osmreltbl.append(
            q_to_ntbl(
                db,
                osmRelTbl[tbl]["TBL"],
                ("SELECT foo.{fid} AS {nfid}, catbl.{osmcatfid} "
                 "FROM ({mtbl}) AS foo INNER JOIN {catTbl} AS catbl "
                 "ON foo.key = catbl.{catkey} AND foo.osmval = catbl.{catval}"
                 ).format(mtbl=" UNION ALL ".join(qs),
                          fid=inSchema["FID"],
                          nfid=osmRelTbl[tbl]["FID"],
                          catTbl=osmCatTbl["TBL"],
                          osmcatfid=osmCatTbl["FID"],
                          catkey=osmCatTbl["KEY_COL"],
                          catval=osmCatTbl["VAL_COL"]),
                api='psql'))

    if not outSQL:
        return osmgeotbl, osmcatbl, osmreltbl
    else:
        from gasp.sql.fm import dump_tbls

        return dump_tbls(db, osmgeotbl + [osmcatbl] + osmreltbl, outSQL)