Esempio n. 1
0
def tweets_to_df(keyword=None,
                 inGeom=None,
                 epsg=None,
                 LANG='pt',
                 NTWEETS=1000,
                 tweetType='mixed',
                 apiKey=None,
                 dropFields=None):
    """
    Search for Tweets and Export them to XLS
    """

    from gasp import goToList

    if not inGeom and not keyword:
        raise ValueError('inGeom or keyword, one of them are required')

    if inGeom and not epsg:
        raise ValueError('inGeom implies epsg')

    if inGeom:
        from gasp.anls.prox.bf import getBufferParam

        x, y, dist = getBufferParam(inGeom, epsg, outSRS=4326)

        dist = float(dist) / 1000

    else:
        x, y, dist = None, None, None

    data = search_tweets(lat=y,
                         lng=x,
                         radius=dist,
                         keyword=keyword,
                         NR_ITEMS=NTWEETS,
                         only_geo=None,
                         __lang=LANG,
                         resultType=tweetType,
                         key=apiKey)

    try:
        if not data:
            return 0
    except:
        pass

    if keyword:
        data["keyword"] = keyword

    else:
        data["keyword"] = 'nan'

    dropFields = goToList(dropFields)

    if dropFields:
        data.drop(dropFields, axis=1, inplace=True)

    return data
Esempio n. 2
0
def places_by_query(bfShp, epsgIn, keyword=None, epsgOut=4326,
                    _limit='100', onlySearchAreaContained=True):
    """
    Get absolute location of facebook data using the Facebook API and
    Pandas to validate data.
    
    Works only for the 'places' search type
    
    buffer_shp cloud be a shapefile with a single buffer feature
    or a dict like:
    buffer_shp = {
        x: x_value,
        y: y_value,
        r: dist
    }
    
    or a list or a tuple:
    buffer_shp = [x, y, r]
    """
    
    import pandas
    from shapely.geometry       import Polygon, Point
    from geopandas              import GeoDataFrame
    from gasp.anls.prox.bf      import getBufferParam
    from gasp.web.dsn.fb.search import by_query
    
    search_type = 'place'
    
    x_center, y_center, dist = getBufferParam(bfShp, epsgIn, outSRS=4326)
    
    data = by_query(
        search_type, keyword=keyword,
        x_center=x_center, y_center=y_center, dist=dist,
        limit=_limit, face_fields=[
            "location", "name", "category_list", "about",
            "checkins", "description", "fan_count"
        ]
    )
    
    try:
        if not data:
            # Return NoData
            return 0
    except:
        pass
    
    # Sanitize category_list field
    data = pandas.concat([
        data.drop(["category_list"], axis=1),
        data["category_list"].apply(pandas.Series)
    ], axis=1)
    
    _int_cols = [
        c for c in data.columns.values if type(c) == long
    ]
    __int_cols = {
        x : "col_{}".format(str(x)) for x in _int_cols
    }
    data.rename(columns=__int_cols, inplace=True)
    data.rename(columns={"id" : "id_1", "name" : "name_1"}, inplace=True)
    
    for k in __int_cols:
        data = pandas.concat([
            data.drop([__int_cols[k]], axis=1),
            data[__int_cols[k]].apply(pandas.Series)
        ], axis=1)
        
        data.rename(columns={
            'id' : 'id_' + str(k+2),
            'name' : 'name_' + str(k+2)
        }, inplace=True)
        
        if long(0) in list(data.columns.values):
            data.drop([0], axis=1, inplace=True)
    
    # Pandas dataframe to Geopandas Dataframe
    geoms = [Point(xy) for xy in zip(data.longitude, data.latitude)]
    data.drop(["latitude", "longitude"], axis=1, inplace=True)
    gdata = GeoDataFrame(data, crs={'init' : 'epsg:4326'}, geometry=geoms)
    
    if onlySearchAreaContained:
        from shapely.wkt       import loads
        from gasp.mng.prj      import project_geom
        from gasp.anls.prox.bf import coord_to_buffer
        
        # Check if all retrieve points are within the search area
        _x_center, _y_center, _dist = getBufferParam(
            bfShp, epsgIn, outSRS=3857
        )
        
        search_area = coord_to_buffer(
            float(_x_center), float(_y_center), float(_dist)
        )
        search_area = project_geom(search_area, 3857, 4326, api='ogr')
        search_area = loads(search_area.ExportToWkt())
        
        gdata["tst_geom"] = gdata["geometry"].intersects(search_area)
        gdata = gdata[gdata["tst_geom"] == True]
        
        gdata.reset_index(drop=True, inplace=True)
    
    # Sanitize id
    gdata["fid"]     = gdata["id_1"]
    gdata["fb_type"] = search_type
    
    __DROP_COLS = ["id_1", "city", "country", "street", "zip", "located_in"]
    DROP_COLS = [c for c in __DROP_COLS if c in gdata.columns.values]
    if onlySearchAreaContained:
        DROP_COLS.append("tst_geom")
    
    gdata.drop(DROP_COLS, axis=1, inplace=True)
    
    if epsgOut != 4326:
        gdata = gdata.to_crs({'init' : 'epsg:{}'.format(str(epsgOut))})
    
    return gdata
Esempio n. 3
0
def photos_location(buffer_shp,
                    epsg_in,
                    keyword=None,
                    epsg_out=4326,
                    onlySearchAreaContained=True,
                    keyToUse=None):
    """
    Search for data in Flickr and return a array with the same data
    
    buffer_shp cloud be a shapefile with a single buffer feature or a dict
    like:
    buffer_shp = {
        x: x_value,
        y: y_value,
        r: dist (in meters)
    }
    or a list or a tuple:
    buffer_shp = [x, y, radius]
    """

    import pandas
    from shapely.geometry import Polygon, Point
    from shapely.wkt import loads
    from geopandas import GeoDataFrame
    from gasp.anls.prox.bf import coord_to_buffer
    from gasp.anls.prox.bf import getBufferParam
    from gasp.mng.prj import project_geom

    x_center, y_center, dist = getBufferParam(buffer_shp, epsg_in, outSRS=4326)

    # Retrive data from Flickr
    photos = search_photos(lat=y_center,
                           lng=x_center,
                           radius=float(dist) / 1000,
                           keyword=keyword,
                           apiKey=keyToUse)

    try:
        if not photos:
            # Return noData
            return 0
    except:
        pass

    photos['longitude'] = photos['longitude'].astype(float)
    photos['latitude'] = photos['latitude'].astype(float)

    geoms = [Point(xy) for xy in zip(photos.longitude, photos.latitude)]
    gdata = GeoDataFrame(photos, crs={'init': 'epsg:4326'}, geometry=geoms)

    if onlySearchAreaContained:
        _x_center, _y_center, _dist = getBufferParam(buffer_shp,
                                                     epsg_in,
                                                     outSRS=3857)
        # Check if all retrieve points are within the search area
        search_area = coord_to_buffer(float(_x_center), float(_y_center),
                                      float(_dist))
        search_area = project_geom(search_area, 3857, 4326, api='ogr')
        search_area = loads(search_area.ExportToWkt())

        gdata["tst_geom"] = gdata["geometry"].intersects(search_area)
        gdata = gdata[gdata["tst_geom"] == True]

        gdata.reset_index(drop=True, inplace=True)

    gdata["fid"] = gdata["id"]

    if "url_l" in gdata.columns.values:
        gdata["url"] = gdata["url_l"]
    else:
        gdata["url"] = 'None'

    gdata["description"] = gdata["_content"]

    # Drop irrelevant fields
    cols = list(gdata.columns.values)
    delCols = []

    for col in cols:
        if col != 'geometry' and  col != 'description' and \
            col != 'fid' and col != 'url' and col != 'datetaken' \
            and col != 'dateupload' and col != 'title':
            delCols.append(col)
        else:
            continue

    gdata.drop(delCols, axis=1, inplace=True)

    if epsg_out != 4326:
        gdata = gdata.to_crs({'init': 'epsg:{}'.format(str(epsg_out))})

    return gdata
Esempio n. 4
0
def geotweets_location(inGeom,
                       epsg_in,
                       keyword=None,
                       epsg_out=4326,
                       onlySearchAreaContained=True,
                       keyToUse=None):
    """
    Search data in Twitter and array with that data
    
    inGeom cloud be a shapefile with a single buffer feature or a dict like:
    inGeom = {
        x: x_value,
        y: y_value,
        r: dist (in meters)
    }
    or a list or a tuple:
    inGeom = [x, y, radius]
    """

    from shapely.geometry import Polygon, Point
    from geopandas import GeoDataFrame
    from gasp.anls.prox.bf import getBufferParam

    x_center, y_center, dist = getBufferParam(inGeom, epsg_in, outSRS=4326)

    # Extract data from Twitter
    data = search_tweets(lat=y_center,
                         lng=x_center,
                         radius=float(dist) / 1000,
                         keyword=keyword,
                         NR_ITEMS=500,
                         only_geo=True,
                         key=keyToUse)

    try:
        if not data:
            return 0
    except:
        pass

    # Pandas to GeoPandas
    geoms = [Point(xy) for xy in zip(data.longitude, data.latitude)]
    data.drop(["latitude", "longitude"], axis=1, inplace=True)
    gdata = GeoDataFrame(data, crs={'init': 'epsg:4326'}, geometry=geoms)

    if onlySearchAreaContained:
        from shapely.wkt import loads
        from gasp.mng.prj import project_geom
        from gasp.anls.prox.bf import coord_to_buffer

        # Check if all retrieve points are within the search area
        _x_center, _y_center, _dist = getBufferParam(inGeom,
                                                     epsg_in,
                                                     outSRS=3857)

        search_area = coord_to_buffer(float(_x_center), float(_y_center),
                                      float(_dist))
        search_area = project_geom(search_area, 3857, 4326, api='ogr')
        search_area = loads(search_area.ExportToWkt())

        gdata["tst_geom"] = gdata["geometry"].intersects(search_area)
        gdata = gdata[gdata["tst_geom"] == True]

        gdata.reset_index(drop=True, inplace=True)

    gdata.drop("tst_geom", axis=1, inplace=True)

    if epsg_out != 4326:
        gdata = gdata.to_crs({'init': 'epsg:{}'.format(str(epsg_out))})

    return gdata
Esempio n. 5
0
def geovideos_to_array(search_words, epsg_out=4326,
                  spatial_filter=None, epsg_filter=4326):
    """
    Locate videos on youtube and save these locations to a vectorial file
    """
    
    import os
    
    from gasp.oss.ops        import del_files_by_name
    from gasp.to.geom        import create_point
    from gasp.mng.prj        import project_geom
    from gasp.anls.tplgy     import point_in_polygon
    from gasp.anls.prox.bf   import draw_buffer
    
    videos = get_video_details_by_keyword(search_words)
    
    videos_with_location = []
    
    for video in videos:
        if video['y'] and video['x']:
            videos_with_location.append(video)
    
    if not len(videos_with_location):
        # Return nodata
        return 0
    
    if spatial_filter:
        from gasp.anls.prox.bf import getBufferParam
        
        x_center, y_center, dist = getBufferParam(
            spatial_filter, epsg_filter)
        
        bufferCenter = project_geom(
            create_point(x_center, y_center, api='ogr'),
            4326, 3857, api='ogr'
        )
        
        bufferGeom = draw_buffer(bufferCenter, dist)
        filterData = []
    
    for instance in videos_with_location:
        # Create point
        WGS_POINT = create_point(
            float(instance['x']),
            float(instance['y']), api='ogr'
        )
        
        point = project_geom(WGS_POINT, 4326, 3857, api='ogr')
        
        isPointInPolygon = point_in_polygon(point, bufferGeom)
        
        if isPointInPolygon:
            if epsg_out != 4326:
                trans_point = project_geom(WGS_POINT, 4326, epsg_out, api='ogr')
                
                instance['x'] = trans_point.GetX()
                instance['y'] = trans_point.GetY()
            
            filterData.append(instance)
    
    return filterData