예제 #1
0
def sanitizeData(df, FACE_PAGE=None):
    from gasp.mng.fld.df import listval_to_newcols

    if FACE_PAGE:
        df['page_ref'] = FACE_PAGE
    # Sanitize created_time
    COLS = df.columns.values

    if 'created_time' in COLS:
        df['datahora'] = df.created_time.str.replace('T', ' ')
        df["datahora"] = df.datahora.str[:-5]

        df.drop(['created_time'], axis=1, inplace=True)

    # Sanitize ID
    df.rename(columns={'id': 'post_id'}, inplace=True)

    # Sanitize Places
    if 'place' in COLS:
        df = listval_to_newcols(df, 'place')
        df.rename(columns={
            'id': 'place_id',
            'name': 'place_name',
            0: 'unk1'
        },
                  inplace=True)

        df = listval_to_newcols(df, 'location')

        df.rename(columns={0: 'unk2'}, inplace=True)

        df.drop(['unk1', 'unk2'], axis=1, inplace=True)

    return df
예제 #2
0
파일: places.py 프로젝트: zonakre/gasp
def find_places(inShp, epsg, radius, output, keyword=None, type=None):
    """
    Extract places from Google Maps
    """

    import pandas
    import time
    from gasp.fm import tbl_to_obj
    from gasp.to.geom import pnt_dfwxy_to_geodf
    from gasp.mng.prj import project
    from gasp.mng.fld.df import listval_to_newcols
    from gasp.to.shp import df_to_shp

    pntDf = tbl_to_obj(inShp)
    pntDf = project(pntDf, None, 4326,
                    gisApi='pandas') if epsg != 4326 else pntDf

    pntDf['latitude'] = pntDf.geometry.y.astype(str)
    pntDf['longitude'] = pntDf.geometry.x.astype(str)

    DATA = 1

    def get_places(row):
        places = get_places_by_radius(row.latitude, row.longitude, radius,
                                      keyword, type)

        if type(DATA) == int:
            DATA = pandas.DataFrame(places['results'])

        else:
            DATA = DATA.append(pandas.DataFrame(places['results']),
                               ignore_index=True)

    a = pntDf.apply(lambda x: get_places(x), axis=1)

    DATA = listval_to_newcols(DATA, 'geometry')
    fldsToDelete = ['viewport', 'opening_hours', 'icon', 'plus_code', 'photos']
    realDeletion = [x for x in fldsToDelete if x in DATA.columns.values]
    DATA.drop(realDeletion, axis=1, inplace=True)

    DATA = listval_to_newcols(DATA, 'location')

    DATA = pnt_dfwxy_to_geodf(DATA, 'lng', 'lat', 4326)

    if epsg != 4326:
        DATA = project(DATA, None, epsg, gisApi='pandas')

    DATA["types"] = DATA.types.astype(str)

    df_to_shp(DATA, output)

    return output
예제 #3
0
파일: flickr.py 프로젝트: zonakre/gasp
def search_photos(lat=None, lng=None, radius=None, keyword=None, apiKey=None):
    """
    Method to connect with Flickr in order to querie photos and other kinds
    of data using keyworkds, coordinates and a radius
    
    Returns a Pandas Dataframe
    """

    import pandas
    from flickrapi import FlickrAPI
    from gasp import unicode_to_str
    from gasp.mng.fld.df import listval_to_newcols

    if apiKey:
        FLIC_PUB, FLIC_SEC = apiKey
    else:
        FLIC_PUB, FLIC_SEC = FLICKR_PUBLIC, FLICKR_SECRET

    flickr_engine = FlickrAPI(FLIC_PUB,
                              FLIC_SEC,
                              format='parsed-json',
                              store_token=False)

    extras = 'url_l,geo,date_taken,date_upload,description'

    if not keyword:
        keyword = ''

    else:
        if type(keyword) == unicode:
            keyword = unicode_to_str(keyword)

    if not lat or not lng or not radius:
        data = flickr_engine.photos.search(text=keyword, pp=500, extras=extras)

    else:
        data = flickr_engine.photos.search(text=keyword,
                                           lat=lat,
                                           lon=lng,
                                           radius=radius,
                                           pp=500,
                                           extras=extras)

    photos_array = pandas.DataFrame(data['photos']['photo'])

    if not photos_array.shape[0]:
        return None

    photos_array = listval_to_newcols(photos_array, "description")

    return photos_array
예제 #4
0
파일: mapbx.py 프로젝트: zonakre/gasp
def matrix_od(originsShp, destinationShp, originsEpsg, destinationEpsg,
              resultShp, modeTrans="driving"):
    """
    Use Pandas to Retrieve data from MapBox Matrix OD Service
    """
    
    import time
    from threading       import Thread
    from gasp.web.mapbx  import get_keys, matrix
    from gasp.fm         import tbl_to_obj
    from gasp.mng.split  import split_df, split_df_inN
    from gasp.mng.fld.df import listval_to_newcols
    from gasp.fm.geom    import pointxy_to_cols
    from gasp.mng.prj    import project
    from gasp.mng.gen    import merge_df
    from gasp.prop.feat  import get_geom_type
    from gasp.to.shp     import df_to_shp
    
    # Data to GeoDataFrame
    origens  = tbl_to_obj(    originsShp)
    destinos = tbl_to_obj(destinationShp)
    
    # Check if SHPs are points
    inGeomType = get_geom_type(origens, geomCol="geometry", gisApi='pandas')
    
    if inGeomType != 'Point' and inGeomType != 'MultiPoint':
        raise ValueError('The input geometry must be of type point')
    
    inGeomType = get_geom_type(destinos, geomCol="geometry", gisApi='pandas')
    
    if inGeomType != 'Point' and inGeomType != 'MultiPoint':
        raise ValueError('The input geometry must be of type point')
    
    # Re-Project data to WGS
    if originsEpsg != 4326:
        origens = project(origens, None, 4326, gisApi='pandas')
    
    if destinationEpsg != 4326:
        destinos = project(destinos, None, 4326, gisApi='pandas')
    
    origens = pointxy_to_cols(
        origens, geomCol="geometry",
        colX="longitude", colY="latitude"
    ); destinos = pointxy_to_cols(
        destinos, geomCol="geometry",
        colX="longitude", colY="latitude"
    )
    
    # Prepare coordinates Str
    origens["location"]  = origens.longitude.astype(str) \
        + "," + origens.latitude.astype(str)
    
    destinos["location"] = destinos.longitude.astype(str) \
        + "," + destinos.latitude.astype(str)
    
    # Split destinations DataFrame into Dafaframes with
    # 24 rows
    lst_destinos = split_df(destinos, 24)
    
    # Get Keys to use
    KEYS = get_keys()
    # Split origins by key
    origensByKey = split_df_inN(origens, KEYS.shape[0])
    
    lst_keys= KEYS["key"].tolist()
    
    # Produce matrix
    results = []
    def get_matrix(origins, key):
        def def_apply(row):
            rowResults = []
            for df in lst_destinos:
                strDest = df.location.str.cat(sep=";")
                
                strLocations = row["location"] + ";" + strDest
                
                dados = matrix(
                    strLocations, idxSources="0",
                    idxDestinations=";".join([str(i) for i in range(1, df.shape[0] + 1)]),
                    useKey=key, modeTransportation=modeTrans
                )
                time.sleep(5)
                
                rowResults += dados["durations"][0]
            
            row["od_matrix"] = rowResults
            
            return row
        
        newOrigins = origins.apply(
            lambda x: def_apply(x), axis=1
        )
        
        results.append(newOrigins)
    
    # Create threads
    thrds = []
    i     = 1
    for df in origensByKey:
        thrds.append(Thread(
            name="tk{}".format(str(i)), target=get_matrix,
            args=(df, lst_keys[i - 1])
        ))
        i += 1
    
    # Start all threads
    for thr in thrds:
        thr.start()
    
    # Wait for all threads to finish
    for thr in thrds:
        thr.join()
    
    # Join all dataframes
    RESULT = merge_df(results, ignIndex=False)
    
    RESULT = listval_to_newcols(RESULT, "od_matrix")
    
    RESULT.rename(
        columns={
            c: "dest_{}".format(c)
            for c in RESULT.columns.values if type(c) == int or type(c) == long
        }, inplace=True
    )
    
    if originsEpsg != 4326:
        RESULT = project(RESULT, None, originsEpsg, gisApi='pandas')
    
    return df_to_shp(RESULT, resultShp)


    
    return results
예제 #5
0
파일: dmx.py 프로젝트: zonakre/gasp
def dist_matrix_by_shp(oShp, dShp, oEpsg, dEpsg, result, transMode=None):
    """
    Create distance matrix using shapes and Google Maps API
    
    - Uses my first API_KEY
    """

    import time
    import pandas
    from gasp.fm import tbl_to_obj
    from gasp.mng.split import split_df
    from gasp.mng.prj import project
    from gasp.mng.fld.df import listval_to_newcols
    from gasp.prop.feat import get_geom_type
    from gasp.mng.gen import merge_df
    from gasp.web.glg.distmx import dist_matrix
    from gasp.to import obj_to_tbl
    from gasp.to.obj import df_to_list
    from gasp.oss import get_filename

    # Origins and Destionations to GeoDataframe
    originsDf = tbl_to_obj(oShp)
    destnatDf = tbl_to_obj(dShp)

    # Check Geometries type - shapes should be of type point
    originsGeom = get_geom_type(originsDf, gisApi='pandas')
    destGeom = get_geom_type(destnatDf, gisApi='pandas')
    if (originsGeom != 'Point' and originsGeom != 'MultiPoint') or \
        (destGeom != 'Point' and destGeom != 'MultiPoint'):
        raise ValueError('All input geometries must be of type point')

    # Re-project GeoDataframes if needed
    originsDf = originsDf if oEpsg == 4326 else \
        project(originsDf, None, 4326, gisApi='pandas')

    destnatDf = destnatDf if dEpsg == 4326 else \
        project(destnatDf, None, 4326, gisApi='pandas')

    # Geom to Field as str
    originsDf["geom"] = originsDf["geometry"].y.astype(str) + "," + \
        originsDf["geometry"].x.astype(str)

    destnatDf["geom"] = destnatDf["geometry"].y.astype(str) + "," + \
        destnatDf["geometry"].x.astype(str)

    originsDf["old_fid"] = originsDf.index
    destnatDf["old_fid"] = destnatDf.index

    # Split Destinations
    lstOrigins = split_df(originsDf, 95)
    for odf in lstOrigins:
        odf.reset_index(inplace=True)

    lstDestinations = df_to_list(destnatDf)
    RESULTS = []
    for destino in lstDestinations:
        for oDf in lstOrigins:
            matrix = dist_matrix(
                str(oDf.geom.str.cat(sep="|")),
                str(destino["geom"]),
                oDf.shape[0],
                1,
                transport_mode=transMode,
                useKey='AIzaSyAmyPmqtxD20urqtpCpn4ER74a6J4N403k')

            matrix = pandas.DataFrame(matrix)
            matrix = listval_to_newcols(matrix, "elements")

            matrix = matrix.merge(oDf,
                                  how='inner',
                                  left_index=True,
                                  right_index=True)

            matrix.rename(columns={
                'old_fid': "fid_origin",
                0: "cost"
            },
                          inplace=True)

            matrix["fid_destin"] = destino['old_fid']

            RESULTS.append(matrix)

            time.sleep(5)

    # Join all dataframes
    RESULT = merge_df(RESULTS, ignIndex=False)
    RESULT = sanitizeDataCols(RESULT, "cost")

    RESULT.drop([
        x
        for x in originsDf.columns.values if x != "geometry" and x != "old_fid"
    ],
                axis=1,
                inplace=True)
    RESULT.rename(columns={"geometry": "origin_geom"}, inplace=True)

    RESULT = RESULT.merge(destnatDf,
                          how='inner',
                          left_on=["fid_destin"],
                          right_on=["old_fid"])
    RESULT.drop([x for x in destnatDf.columns.values if x != "geometry"],
                axis=1,
                inplace=True)
    RESULT.rename(columns={"geometry": "destin_geom"}, inplace=True)

    RESULT["origin_geom"] = RESULT.origin_geom.astype(str)
    RESULT["destin_geom"] = RESULT.destin_geom.astype(str)

    obj_to_tbl(RESULT, result, sheetsName=get_filename(result))

    return result
예제 #6
0
def search_tweets(lat=None,
                  lng=None,
                  radius=None,
                  keyword=None,
                  NR_ITEMS=500,
                  only_geo=None,
                  __lang=None,
                  key=None,
                  resultType='mixed'):
    """
    Basic tool to extract data from Twitter using a keyword and/or a buffer
    
    * radius should be in Km
    * options for resulType: mixed, recent, popular
    
    Returns an array with the encountered data
    """

    import tweepy
    import pandas
    from gasp.mng.fld.df import listval_to_newcols
    from gasp import unicode_to_str

    if not key:
        TOKEN, SECRET, CONSUMER_KEY, CONSUMER_SECRET = TWITTER_TOKEN['TOKEN'],\
            TWITTER_TOKEN['SECRET'], TWITTER_TOKEN['CONSUMER_KEY'],\
            TWITTER_TOKEN['CONSUMER_SECRET']
    else:
        TOKEN, SECRET, CONSUMER_KEY, CONSUMER_SECRET = key

    resultType = None if resultType == 'mixed' else resultType

    # Give our credentials to the Twitter API
    auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)

    auth.set_access_token(TOKEN, SECRET)

    api = tweepy.API(auth)

    # Request data from twitter
    if not keyword:
        keyword = ''

    else:
        if type(keyword) == unicode:
            keyword = unicode_to_str(keyword)

    if not lat or not lng or not radius:
        data = [
            i._json
            for i in tweepy.Cursor(api.search,
                                   q=keyword,
                                   lang=__lang,
                                   count=50,
                                   result_type=resultType).items(NR_ITEMS)
        ]

    else:
        __geostr = '{_lat},{_lng},{r}km'.format(_lat=str(lat),
                                                _lng=str(lng),
                                                r=str(radius))

        data = [
            i._json
            for i in tweepy.Cursor(api.search,
                                   q=keyword,
                                   geocode=__geostr,
                                   lang=__lang,
                                   count=50,
                                   result_type=resultType).items(NR_ITEMS)
        ]

    data = pandas.DataFrame(data)

    if not data.shape[0]:
        return None

    data.rename(columns={
        "id": "fid",
        "created_at": "tweet_time",
        "lang": "tweet_lang"
    },
                inplace=True)

    if "place" in data.columns.values:
        from shapely.geometry import shape

        def get_wkt(x):
            if type(x) == dict:
                g = shape(x)

                return str(g.wkt)

            else:
                return 'None'

        # Split in several columns
        data = listval_to_newcols(data, "place")

        cols = list(data.columns.values)
        colsRename = {}
        for c in cols:
            if c == "name":
                colsRename[c] = "place_name"
            elif c == "country":
                colsRename[c] = "place_country"
            elif c == "country_code":
                colsRename[c] = "place_countryc"
            elif c == "id":
                colsRename[c] = "place_id"
            else:
                continue

        data.rename(columns=colsRename, inplace=True)

        if 'bounding_box' in data.columns.values:
            data["place_box"] = data.bounding_box.apply(get_wkt)

        else:
            data["place_box"] = 'None'

    cols = list(data.columns.values)

    INTEREST_COLS = [
        'user', 'text', 'fid', 'geo', 'tweet_time', 'retweeted', 'tweet_lang',
        'place_name', 'place_country', 'place_countryc', 'place_id',
        'place_box'
    ]

    delCols = [x for x in cols if x not in INTEREST_COLS]

    data.drop(delCols, axis=1, inplace=True)

    dfGeom = data[data["geo"].astype(str) != 'None']

    if only_geo and not dfGeom.shape[0]:
        return None

    elif not only_geo and not dfGeom.shape[0]:
        result = data

        result["latitude"] = result["geo"]
        result["longitude"] = result["geo"]
        result.drop("geo", axis=1, inplace=True)

    else:
        dfGeom = pandas.concat(
            [dfGeom.drop(["geo"], axis=1), dfGeom["geo"].apply(pandas.Series)],
            axis=1)

        dfGeom = pandas.concat([
            dfGeom.drop(["coordinates"], axis=1), dfGeom["coordinates"].apply(
                pandas.Series)
        ],
                               axis=1)

        dfGeom.rename(columns={0: 'latitude', 1: 'longitude'}, inplace=True)

        dfGeom.drop("type", axis=1, inplace=True)

        if only_geo:
            result = dfGeom

        else:
            dfNoGeom = data[data["geo"].astype(str) == 'None']
            dfNoGeom["latitude"] = dfNoGeom["geo"]
            dfNoGeom["longitude"] = dfNoGeom["geo"]

            dfNoGeom.drop("geo", axis=1, inplace=True)

            result = dfGeom.append(dfNoGeom, ignore_index=True)

    result = pandas.concat(
        [result.drop(["user"], axis=1), result["user"].apply(pandas.Series)],
        axis=1)

    result.rename(columns={
        'screen_name': 'user',
        'id': 'user_id',
        'location': 'user_location',
        'name': 'username'
    },
                  inplace=True)

    INTEREST_COLS += [
        'user', 'followers_count', 'user_id', 'user_location', 'username',
        'latitude', 'longitude'
    ]
    cols = list(result.columns.values)
    delCols = [c for c in cols if c not in INTEREST_COLS]

    result.drop(delCols, axis=1, inplace=True)

    result["url"] = 'https://twitter.com/' + \
        result["user"].astype(str) + '/status/' + \
        result["fid"].astype(str)

    return result