Python merge_df Examples, glass.ng.pd.merge_df Python Examples

Example #1

0

Show file

File: dagg.py Project: jasp382/glass

def df_cols_to_rows_smp(df, idcol, colstoval, colstovalname, valcol):
    """
    Dataframe Like:

      | date	 | station | RTD	| EC        | vstation | id
    0 |	00:00:14 | None    | 16.259	| 31029.021 | 34       | 1
    1 |	00:00:39 | None	   | 21.555	| 32139.144 | 34       | 2
    2 |	00:00:56 | None    | 23.574	| 17893.594	| 34       | 3
    3 |	00:01:09 | None    | 22.218	| 43267.140 | 34       | 4
    4 |	00:01:30 | None	   | 16.013	| 45895.465	| 34       | 5

    To:

      | id | param | value
    0 | 1  | RTD   | 16.259
    1 | 1  | EC    | 31029.021
    2 | 2  | RTD   | 21.555
    3 | 2  | EC    | 32139.144
    4 | 3  | RTD   | 23.574
    5 | 3  | EC    | 17893.594

    idcol = id
    colstoval = [RTD, EC]
    colstovalname = param
    valcol = value
    """

    from glass.pys import obj_to_lst
    from glass.ng.pd.dagg import merge_df

    colstoval = obj_to_lst(colstoval)

    dfs = []
    for col in colstoval:
        ndf = df.copy(deep=True)

        dropcols = [c for c in ndf.columns.values if c != col and c != idcol]

        ndf.drop(dropcols, axis=1, inplace=True)

        ndf.rename(columns={col: valcol}, inplace=True)

        ndf[colstovalname] = col

        dfs.append(ndf)

    result = merge_df(dfs)

    return result

Example #2

0

Show file

File: dagg.py Project: jasp382/glass

def dfcolstorows(inDf, colField, valField, colFid=None):
    """
    Dataframe Like:
    
      | pop_res |   ind2  | ind3 | id_unit
    0 |   571   | 35.0975 | 123  |  3768 
    1 |   938   | 18.2114 | 265  |  3618 
    2 |   554   | 44.3149 |  76  |  3788 
    3 |   711   | 37.8619 | 134  |  3766
    4 |  1268   | 46.0733 | 203  |  3690
    
    To:
    
      | colField | valField
    0 | pop_res |   571
    1 |  ind2   | 35.0975
    2 |  ind3   |   123
    3 | id_unit |  3768 
    4 | pop_res | 938
    5 |  ind2   | 18.2114
    6 |  ind3   | 265
    7 | id_unit | 3618
    """

    from glass.ng.pd import merge_df

    newDfs = []
    cols = list(inDf.columns.values)

    if colFid and colFid in cols:
        cols.remove(colFid)

    for col in cols:
        ndf = inDf.copy()

        ndf.drop([c for c in cols if c != col], axis=1, inplace=True)
        ndf[colField] = col
        ndf.rename(columns={col: valField}, inplace=True)

        newDfs.append(ndf)

    res = merge_df(newDfs)

    return res

Example #3

0

Show file

File: tbl.py Project: jasp382/glass

def merge_tbls(folder, out_tbl, tbl_format='.dbf'):
    """
    Merge all tables in folder into one single table
    """

    from glass.pys.oss import lst_ff
    from glass.ng.rd import tbl_to_obj
    from glass.ng.wt import obj_to_tbl
    from glass.ng.pd import merge_df

    tbls = lst_ff(folder, file_format=tbl_format)

    tbls_dfs = [tbl_to_obj(t) for t in tbls]

    out_df = merge_df(tbls_dfs)

    obj_to_tbl(out_df, out_tbl)

    return out_tbl

Example #4

0

Show file

def same_attr_to_shp(inShps, interestCol, outFolder, basename="data_",
                     resultDict=None):
    """
    For several SHPS with the same field, this program will list
    all values in such field and will create a new shp for all
    values with the respective geometry regardeless the origin shp.
    """
    
    import os
    from glass.g.rd.shp import shp_to_obj
    from glass.ng.pd    import merge_df
    from glass.g.wt.shp import df_to_shp
    
    EXT = os.path.splitext(inShps[0])[1]
    
    shpDfs = [shp_to_obj(shp) for shp in inShps]
    
    DF = merge_df(shpDfs, ignIndex=True)
    #DF.dropna(axis=0, how='any', inplace=True)
    
    uniqueVal = DF[interestCol].unique()
    
    nShps = [] if not resultDict else {}
    for val in uniqueVal:
        ndf = DF[DF[interestCol] == val]
        
        KEY = str(val).split('.')[0] if '.' in str(val) else str(val)
        
        nshp = df_to_shp(ndf, os.path.join(
            outFolder, '{}{}{}'.format(basename, KEY, EXT)
        ))
        
        if not resultDict:
            nShps.append(nshp)
        else:
            nShps[KEY] = nshp
    
    return nShps

Example #5

0

Show file

def lst_prod_by_cell_and_year(shp,
                              id_col,
                              year,
                              outshp,
                              platform="Sentinel-2",
                              processingl='Level-2A',
                              epsg=32629):
    """
    Get a list of images:
    * one for each grid in shp;
    * one for each month in one year - the choosen image will be the one
    with lesser area occupied by clouds;
    total_images = grid_number * number_months_year
    """

    from glass.g.rd.shp import shp_to_obj
    from glass.ng.pd import merge_df
    from glass.g.wt.shp import df_to_shp
    from glass.g.it.pd import df_to_geodf

    months = {
        '01': '31',
        '02': '28',
        '03': '31',
        '04': '30',
        '05': '31',
        '06': '30',
        '07': '31',
        '08': '31',
        '09': '30',
        '10': '31',
        '11': '30',
        '12': '31'
    }

    # Open SHP
    grid = shp_to_obj(shp, srs_to=4326)

    def get_grid_id(row):
        row['cellid'] = row.title.split('_')[5][1:]

        return row

    # Search for images
    dfs = []
    for idx, cell in grid.iterrows():
        for k in months:
            start = "{}{}01".format(str(year), k)
            end = "{}{}{}".format(str(year), k, months[k])

            if year == 2018 and processingl == 'Level-2A':
                if k == '01' or k == '02':
                    plevel = 'Level-2Ap'
                else:
                    plevel = processingl
            else:
                plevel = processingl

        prod = lst_prod(cell.geometry.wkt,
                        start,
                        end,
                        platname=platform,
                        procLevel=plevel)

        if not prod.shape[0]:
            continue

        # Get area
        prod = prod.to_crs('EPSG:{}'.format(str(epsg)))
        prod['areav'] = prod.geometry.area / 1000000

        # We want only images with more than 70% of data
        prod = prod[prod.areav >= 7000]

        # ID Cell ID
        prod = prod.apply(lambda x: get_grid_id(x), axis=1)
        # Filter Cell ID
        prod = prod[prod.cellid == cell[id_col]]

        # Sort by cloud cover and date
        prod = prod.sort_values(['cloudcoverpercentage', 'ingestiondate'],
                                ascending=[True, True])

        # Get only the image with less cloud cover
        prod = prod.head(1)

        dfs.append(prod)

    fdf = merge_df(dfs)
    fdf = df_to_geodf(fdf, 'geometry', epsg)

    df_to_shp(fdf, outshp)

    return outshp

Example #6

0

Show file

def dsnsearch_by_cell(GRID_PNT, EPSG, RADIUS, DATA_SOURCE, db, OUTPUT_TABLE):
    """
    Search for data in DSN and other platforms by cell
    """

    import time
    from glass.g.rd.shp import shp_to_obj
    from glass.ng.sql.db import create_db
    from glass.g.acq.dsn.fb.places import places_by_query
    from glass.g.prj.obj import df_prj
    from glass.ng.pd import merge_df
    from glass.g.it.shp import dbtbl_to_shp
    from glass.ng.sql.q import q_to_ntbl
    from glass.g.wt.sql import df_to_db

    # Open GRID SHP
    GRID_DF = shp_to_obj(GRID_PNT)
    GRID_DF = df_prj(GRID_DF, 4326) if EPSG != 4326 else GRID_DF

    GRID_DF["lng"] = GRID_DF.geometry.x.astype(float)
    GRID_DF["lat"] = GRID_DF.geometry.y.astype(float)
    GRID_DF["grid_id"] = GRID_DF.index

    # GET DATA
    RESULTS = []

    def get_data(row, datasrc):
        if datasrc == 'facebook':
            d = places_by_query({
                'x': row.lng,
                'y': row.lat,
                'r': RADIUS
            },
                                4326,
                                keyword=None,
                                epsgOut=EPSG,
                                _limit='100',
                                onlySearchAreaContained=None)

        else:
            raise ValueError(
                '{} as datasource is not a valid value'.format(datasrc))

        if type(d) == int:
            return

        d['grid_id'] = row.grid_id

        RESULTS.append(d)

        time.sleep(5)

    GRID_DF.apply(lambda x: get_data(x, DATA_SOURCE), axis=1)

    RT = merge_df(RESULTS)

    # Create DB
    create_db(db, overwrite=True, api='psql')

    # Send Data to PostgreSQL
    df_to_db(db,
             RT,
             "{}_data".format(DATA_SOURCE),
             EPSG,
             "POINT",
             colGeom='geometry' if 'geometry' in RT.columns.values else 'geom')

    COLS = [
        x for x in RT.columns.values if x != "geometry" and \
        x != 'geom' and x != "grid_id"
    ] + ["geom"]

    GRP_BY_TBL = q_to_ntbl(
        db,
        "{}_grpby".format(DATA_SOURCE),
        ("SELECT {cols}, CAST(array_agg(grid_id) AS text) AS grid_id "
         "FROM {dtsrc}_data GROUP BY {cols}").format(cols=", ".join(COLS),
                                                     dtsrc=DATA_SOURCE),
        api='psql')

    dbtbl_to_shp(db, GRP_BY_TBL, "geom", OUTPUT_TABLE, api="psql", epsg=EPSG)

    return OUTPUT_TABLE

Example #7

0

Show file

File: __init__.py Project: jasp382/glass

def otp_cf_based_on_rel(incidents, group_incidents_col, facilities,
                        facilities_id, rel_inc_fac, sheet, group_fk,
                        facilities_fk, hour, day, output):
    """
    Calculate time travel considering specific facilities
    for each group of incidents

    Relations between incidents and facilities are in a auxiliar table (rel_inc_fac).
    Auxiliar table must be a xlsx file
    """

    import os
    import pandas as pd
    from glass.ng.rd import tbl_to_obj
    from glass.g.rd.shp import shp_to_obj
    from glass.g.wt.shp import obj_to_shp
    from glass.g.mob.otp.log import clsfacility
    from glass.g.prop.prj import get_shp_epsg
    from glass.ng.pd import merge_df
    from glass.pys.oss import fprop
    from glass.g.prj.obj import df_prj

    # Avoid problems when facilities_id == facilities_fk
    facilities_fk = facilities_fk + '_fk' if facilities_id == facilities_fk else \
        facilities_fk

    # Open data
    idf = df_prj(shp_to_obj(incidents), 4326)
    fdf = df_prj(shp_to_obj(facilities), 4326)

    rel_df = tbl_to_obj(rel_inc_fac, sheet=sheet)

    oepsg = get_shp_epsg(incidents)

    # Relate facilities with incidents groups
    fdf = fdf.merge(rel_df,
                    how='inner',
                    left_on=facilities_id,
                    right_on=facilities_fk)

    # List Groups
    grp_df = pd.DataFrame({
        'cnttemp':
        idf.groupby([group_incidents_col])[group_incidents_col].agg('count')
    }).reset_index()

    # Do calculations
    res = []
    logs = []
    for idx, row in grp_df.iterrows():
        # Get incidents for that group
        new_i = idf[idf[group_incidents_col] == row[group_incidents_col]]

        # Get facilities for that group
        new_f = fdf[fdf[group_fk] == row[group_incidents_col]]

        # calculate closest facility
        cfres, l = clsfacility(new_i, new_f, hour, day, out_epsg=oepsg)

        res.append(cfres)
        logs.extend(l)

    # Merge results
    out_df = merge_df(res)

    # Recovery facility id
    fdf.drop([c for c in fdf.columns.values if c != facilities_id],
             axis=1,
             inplace=True)
    out_df = out_df.merge(fdf, how='left', left_on='ffid', right_index=True)

    # Export result
    obj_to_shp(out_df, "geom", oepsg, output)

    # Write logs
    if len(logs) > 0:
        with open(
                os.path.join(os.path.dirname(output),
                             fprop(output, 'fn') + '_log.txt'), 'w') as txt:
            for i in logs:
                txt.write(("Incident_id: {}\n"
                           "Facility_id: {}\n"
                           "ERROR message:\n"
                           "{}\n"
                           "\n\n\n\n\n\n").format(str(i[0]), str(i[1]),
                                                  str(i[2])))

    return output

Example #8

0

Show file

File: __init__.py Project: jasp382/glass

def otp_servarea(facilities, hourday, date, breaks, output, vel=None):
    """
    OTP Service Area
    """

    import requests
    import os
    from glass.cons.otp import ISO_URL
    from glass.g.rd.shp import shp_to_obj
    from glass.g.prj.obj import df_prj
    from glass.g.prop.prj import get_shp_epsg
    from glass.g.wt.shp import obj_to_shp
    from glass.pys.oss import fprop
    from glass.g.it.pd import json_obj_to_geodf
    from glass.ng.pd import merge_df
    from glass.pys import obj_to_lst

    breaks = obj_to_lst(breaks)

    # Open Data
    facilities_df = df_prj(shp_to_obj(facilities), 4326)

    # Place request parameters
    get_params = [('mode', 'WALK,TRANSIT'), ('date', date), ('time', hourday),
                  ('maxWalkDistance', 50000),
                  ('walkSpeed', 3 if not vel else vel)]

    breaks.sort()

    for b in breaks:
        get_params.append(('cutoffSec', b))

    # Do the math
    error_logs = []
    results = []

    for i, r in facilities_df.iterrows():
        fromPlace = str(r.geometry.y) + ',' + str(r.geometry.x)

        if not i:
            get_params.append(('fromPlace', fromPlace))
        else:
            get_params[-1] = ('fromPlace', fromPlace)

        resp = requests.get(ISO_URL,
                            get_params,
                            headers={'accept': 'application/json'})

        try:
            data = resp.json()
        except:
            error_logs.append([i, 'Cannot retrieve JSON Response'])
            continue

        gdf = json_obj_to_geodf(data, 4326)
        gdf['ffid'] = i

        results.append(gdf)

    # Merge all Isochrones
    df_res = merge_df(results)

    out_epsg = get_shp_epsg(facilities)

    if out_epsg != 4326:
        df_res = df_prj(df_res, out_epsg)

    obj_to_shp(df_res, "geometry", out_epsg, output)

    # Write logs
    if len(error_logs):
        with open(
                os.path.join(os.path.dirname(output),
                             fprop(output, 'fn') + '.log.txt'), 'w') as txt:
            for i in error_logs:
                txt.write(("Facility_id: {}\n"
                           "ERROR message:\n"
                           "{}\n"
                           "\n\n\n\n\n\n").format(str(i[0]), i[1]))

    return output

Example #9

0

Show file

File: dagg.py Project: jasp382/glass

def df_cols_to_rows(inDf, TO_COLS, col_old_col_name, key_old_col_name,
                    col_mantain):
    """
    Dataframe like:

      | pop_res |   ind2  | ind3 |  ind5   | id_unit |pop_res_int | ind2_int | ind3_int| ind5_int
    0 |   571   | 35.0975 | 123  | 97.373  |   3768  |     2      |    6     |    2    |    7
    1 |   938   | 18.2114 | 265  | 93.4968 |   3618  |     3      |    1     |    5    |    4
    2 |   554   | 44.3149 |  76  | 97.4074 |   3788  |     1      |    7     |    1    |    7
    3 |   711   | 37.8619 | 134  | 96.1429 |   3766  |     2      |    6     |    3    |    6
    4 |  1268   | 46.0733 | 203  | 90.9385 |   3690  |     5      |    7     |    4    |    3

    To:

    0 | id_unit | id_indicator |  value  | cls
    2 |   3768  |    pop_res   |   571   |  2
    3 |   3768  |     ind2     | 35.0975 |  6
    4 |   3768  |     ind3     |   123   |  2
    5 |   3768  |     ind5     | 97.373  |  7
    6 |   3618  |    pop_res   |   938   |  3
    7 |   3618  |     ind2     | 18.2114 |  1
    8 |   3618  |     ind3     |   265   |  5 
    9 |   3618  |     ind5     | 93.4968 |  4
    ...
    
    Using as parameters:
    data_cols = ['pop_res', 'ind2', 'ind3', 'ind5']
    col_mantain = 'id_unit'
    TO_COLS = {
        # Dict values should have the same length
        'value' : data_cols,
        'cls'   : [i + '_int' for i in data_cols]
    }
    
    col_old_col_name = 'id_indicator'
    key_old_col_name = 'value'
    """

    from glass.pys import obj_to_lst
    from glass.ng.pd import merge_df

    col_mantain = obj_to_lst(col_mantain)
    newCols = list(TO_COLS.keys())
    newDfs = []

    for i in range(len(TO_COLS[newCols[0]])):
        ndf = inDf.copy(deep=True)

        DROP_COLS = []
        COLS_MANT = col_mantain.copy()

        for K in TO_COLS:
            COLS_MANT.append(TO_COLS[K][i])

        for col in ndf.columns.values:
            if col not in COLS_MANT:
                DROP_COLS.append(col)

        ndf.drop(DROP_COLS, axis=1, inplace=True)
        ndf.rename(columns={TO_COLS[k][i]: k for k in TO_COLS}, inplace=True)

        ndf[col_old_col_name] = TO_COLS[key_old_col_name][i]

        newDfs.append(ndf)

    outDf = merge_df(newDfs)

    return outDf

Example #10

0

Show file

File: esri.py Project: jasp382/glass

def closest_facility(incidents,
                     incidents_id,
                     facilities,
                     output,
                     impedance='TravelTime'):
    """
    impedance options:
    * TravelTime;
    * WalkTime;
    """

    import requests
    import pandas as pd
    import numpy as np
    from glass.cons.esri import rest_token, CF_URL
    from glass.g.it.esri import json_to_gjson
    from glass.g.rd.shp import shp_to_obj
    from glass.g.wt.shp import df_to_shp
    from glass.ng.pd.split import df_split
    from glass.ng.pd import merge_df
    from glass.g.prop.prj import get_shp_epsg
    from glass.g.prj.obj import df_prj
    from glass.g.it.pd import df_to_geodf
    from glass.g.it.pd import json_obj_to_geodf
    from glass.cons.esri import get_tv_by_impedancetype

    # Get API token
    token = rest_token()

    # Data to Pandas DataFrames
    fdf = shp_to_obj(facilities)
    idf = shp_to_obj(incidents)

    # Re-project to WGS84
    fdf = df_prj(fdf, 4326)
    idf = df_prj(idf, 4326)

    # Geomtries to Str - inputs for requests
    fdf['coords'] = fdf.geometry.x.astype(str) + ',' + fdf.geometry.y.astype(
        str)
    idf['coords'] = idf.geometry.x.astype(str) + ',' + idf.geometry.y.astype(
        str)

    # Delete geometry from facilities DF
    idf.drop(['geometry'], axis=1, inplace=True)

    # Split data
    # ArcGIS API only accepts 100 facilities
    # # and 100 incidents in each request
    fdfs = df_split(fdf, 100, nrows=True) if fdf.shape[0] > 100 else [fdf]
    idfs = df_split(idf, 100, nrows=True) if idf.shape[0] > 100 else [idf]

    for i in range(len(idfs)):
        idfs[i].reset_index(inplace=True)
        idfs[i].drop(['index'], axis=1, inplace=True)

    for i in range(len(fdfs)):
        fdfs[i].reset_index(inplace=True)
        fdfs[i].drop(['index'], axis=1, inplace=True)

    # Get travel mode
    tv = get_tv_by_impedancetype(impedance)

    # Ask for results
    results = []

    drop_cols = [
        'ObjectID', 'FacilityID', 'FacilityRank', 'Name',
        'IncidentCurbApproach', 'FacilityCurbApproach', 'IncidentID',
        'StartTime', 'EndTime', 'StartTimeUTC', 'EndTimeUTC', 'Total_Minutes',
        'Total_TruckMinutes', 'Total_TruckTravelTime', 'Total_Miles'
    ]

    if impedance == 'WalkTime':
        tv_col = 'walktime'
        rn_cols = {'Total_WalkTime': tv_col}

        ndrop = ['Total_Kilometers', 'Total_TravelTime', 'Total_Minutes']

    elif impedance == 'metric':
        tv_col = 'kilomts'
        rn_cols = {'Total_Kilometers': tv_col}

        ndrop = ['Total_WalkTime', 'Total_TravelTime', 'Total_Minutes']

    else:
        tv_col = 'traveltime'
        rn_cols = {'Total_TravelTime': tv_col}

        ndrop = ['Total_Kilometers', 'Total_WalkTime', 'Total_Minutes']

    drop_cols.extend(ndrop)

    for i_df in idfs:
        incidents_str = i_df.coords.str.cat(sep=';')

        for f_df in fdfs:
            facilities_str = f_df.coords.str.cat(sep=';')

            # Make request
            r = requests.get(CF_URL,
                             params={
                                 'facilities': facilities_str,
                                 'incidents': incidents_str,
                                 'token': token,
                                 'f': 'json',
                                 'travelModel': tv,
                                 'defaultTargetFacilityCount': '1',
                                 'returnCFRoutes': True,
                                 'travelDirection':
                                 'esriNATravelDirectionToFacility',
                                 'impedanceAttributeName': impedance
                             })

            if r.status_code != 200:
                raise ValueError('Error when requesting from: {}'.format(
                    str(r.url)))

            # Convert ESRI json to GeoJson
            esri_geom = r.json()
            geom = json_to_gjson(esri_geom.get('routes'))

            # GeoJSON to GeoDataFrame
            gdf = json_obj_to_geodf(geom, 4326)

            # Delete unwanted columns
            gdf.drop(drop_cols, axis=1, inplace=True)

            # Rename some interest columns
            gdf.rename(columns=rn_cols, inplace=True)

            # Add to results original attributes of incidents
            r_df = gdf.merge(i_df,
                             how='left',
                             left_index=True,
                             right_index=True)

            results.append(r_df)

    # Compute final result
    # Put every DataFrame in a single DataFrame
    fgdf = merge_df(results)

    # Since facilities were divided
    # The same incident has several "nearest" facilities
    # We just want one neares facility
    # Lets group by using min operator
    gpdf = pd.DataFrame(fgdf.groupby([incidents_id]).agg({tv_col: 'min'
                                                          })).reset_index()

    gpdf.rename(columns={incidents_id: 'iid', tv_col: 'impcol'}, inplace=True)

    # Recovery geometry
    fgdf = fgdf.merge(gpdf, how='left', left_on=incidents_id, right_on='iid')
    fgdf = fgdf[fgdf[tv_col] == fgdf.impcol]
    fgdf = df_to_geodf(fgdf, 'geometry', 4326)

    # Remove repeated units
    g = fgdf.groupby('iid')
    fgdf['rn'] = g[tv_col].rank(method='first')
    fgdf = fgdf[fgdf.rn == 1]

    fgdf.drop(['iid', 'rn'], axis=1, inplace=True)

    # Re-project to original SRS
    epsg = get_shp_epsg(facilities)
    fgdf = df_prj(fgdf, epsg)

    # Export result
    df_to_shp(fgdf, output)

    return output

Example #11

0

Show file

File: esri.py Project: jasp382/glass

def service_areas(facilities, breaks, output, impedance='TravelTime'):
    """
    Produce Service Areas Polygons
    """

    import requests
    from glass.cons.esri import rest_token, SA_URL
    from glass.g.rd.shp import shp_to_obj
    from glass.g.prj.obj import df_prj
    from glass.g.it.esri import json_to_gjson
    from glass.g.it.pd import json_obj_to_geodf
    from glass.g.wt.shp import df_to_shp
    from glass.cons.esri import get_tv_by_impedancetype
    from glass.ng.pd.split import df_split
    from glass.ng.pd import merge_df
    from glass.g.prop.prj import get_shp_epsg

    # Get Token
    token = rest_token()

    # Get data
    pntdf = shp_to_obj(facilities)

    pntdf = df_prj(pntdf, 4326)

    pntdf['coords'] = pntdf.geometry.x.astype(
        str) + ',' + pntdf.geometry.y.astype(str)

    pntdf.drop(['geometry'], axis=1, inplace=True)

    dfs = df_split(pntdf, 100, nrows=True)

    # Make requests
    gdfs = []
    for df in dfs:
        facilities_str = df.coords.str.cat(sep=';')

        tv = get_tv_by_impedancetype(impedance)

        r = requests.get(
            SA_URL,
            params={
                'facilities': facilities_str,
                'token': token,
                'f': 'json',
                'travelModel': tv,
                'defaultBreaks': ','.join(breaks),
                'travelDirection': 'esriNATravelDirectionToFacility',
                #'travelDirection'        : 'esriNATravelDirectionFromFacility',
                'outputPolygons': 'esriNAOutputPolygonDetailed',
                'impedanceAttributeName': impedance
            })

        if r.status_code != 200:
            raise ValueError('Error when requesting from: {}'.format(str(
                r.url)))

        esri_geom = r.json()
        geom = json_to_gjson(esri_geom.get('saPolygons'))

        gdf = json_obj_to_geodf(geom, 4326)

        gdf = gdf.merge(df, how='left', left_index=True, right_index=True)

        gdfs.append(gdf)

    # Compute final result
    fgdf = merge_df(gdfs)

    epsg = get_shp_epsg(facilities)
    fgdf = df_prj(fgdf, epsg)

    df_to_shp(fgdf, output)

    return output

Example #12

0

Show file

File: tw_collect.py Project: jasp382/glass

def search_by_keyword(db, out_tbl, qarea, wgrp=None):
    """
    Get data using keywords
    """

    import os
    import pandas           as pd
    from multiprocessing    import Process, Manager
    from glass.cons.dsn     import search_words, tw_key
    from glass.ng.pd       import merge_df
    from glass.ng.pd.split import df_split
    from glass.g.wt.sql    import df_to_db

    # Get API Keys
    keys = tw_key()

    # Get search words
    words = search_words(group=wgrp)

    # Split search words
    search_words = [words] if len(keys) == 1 else df_split(words, len(keys))

    # Search for data
    with Manager() as manager:
        DFS       = manager.list()
        LOG_LST   = manager.list()
        DROP_COLS = ["retweeted"]
    
        # Create Threads
        thrds = [Process(
            name='tk{}'.format(str(i)), target=get_tweets,
            args=(DFS, LOG_LST, search_words[i], qarea, keys[i], DROP_COLS, i)
        ) for i in range(len(search_words))]

        for t in thrds:
            t.start()
    
        for t in thrds:
            t.join()
    
        if not len(DFS):
            raise ValueError('NoData was collected!')
    
        # Merge all dataframes
        if len(DFS) == 1:
            all_df = DFS[0]
    
        else:
            all_df = merge_df(DFS, ignIndex=True, ignoredfstype=True)
    
        all_df.rename(columns={"user" : "username"}, inplace=True)

        # Sanitize time reference
        all_df['daytime'] = pd.to_datetime(all_df.tweet_time)
        all_df.daytime = all_df.daytime.astype(str)
        all_df.daytime = all_df.daytime.str.slice(start=0, stop=-6)
        all_df.drop('tweet_time', axis=1, inplace=True)

        # Rename cols
        all_df.rename(columns={
            'text' : 'txt', 'tweet_lang' : 'tlang', 'user_id' : 'userid',
            'user_location' : 'userloc', 'place_country' : 'placecountry',
            'place_countryc' : 'placecountryc', 'place_name' : 'placename',
            'place_box' : 'placebox', 'place_id' : 'placeid',
            'followers_count' : 'followersn'
        }, inplace=True)

        # Data to new table
        df_to_db(db, all_df, out_tbl, append=True, api='psql')

        # Write log file
        log_txt = os.path.join(
            os.path.dirname(os.path.abspath(__file__)),
            '{}-log.txt'.format(out_tbl)
        )
        with open(log_txt, 'w') as f:
            f.write("\n".join(LOG_LST))
    
    return log_txt