def by_query(search_type, keyword=None, x_center=None, y_center=None, dist=None, limit='100', face_fields=None): """ Search data on facebook based on: - Keyword; - search type (user, page, event, place, placetopic); - location (center and distance from center); - limit (maximum number of users/pages/etc. to be returned)*. * Our default is 100, but the Facebook default is 60. Returns an array with the id of the data in facebook """ import pandas from gasp.pyt import obj_to_lst from gasp.fm.web import http_to_json # Deal with spaces in the keyword expression and with special characters keyword = keyword.replace(' ', '%20') if keyword and ' ' in keyword \ else keyword face_fields = obj_to_lst(face_fields) URL = ( '{graph}search?access_token={_id}|{scrt}' '{_q}{typ}{cnt}{dst}{lmt}{flds}' ).format( graph=FACEBOOK_GRAPH_URL, _id = FACEBOOK_TOKEN['APP_ID'], scrt = FACEBOOK_TOKEN['APP_SECRET'], _q = '' if not keyword else '&q={}'.format(keyword), typ = '&type={}'.format(search_type), cnt = '' if not x_center and not y_center else '¢er={},{}'.format( y_center, x_center ), dst = '' if not dist else '&distance={}'.format(dist), lmt = '' if not limit else '&limit={}'.format(str(limit)), flds = '' if not face_fields else '&fields={}'.format(','.join(face_fields)) ) face_table = pandas.DataFrame(http_to_json(URL)['data']) if not face_table.shape[0]: return None face_table["url"] = "https://facebook.com//" + face_table["id"] if face_fields: if "location" in face_fields: face_table = pandas.concat([ face_table.drop(["location"], axis=1), face_table["location"].apply(pandas.Series) ], axis=1) return face_table
def email_exists(email): """ Verify if a email exists using MailBoxLayer API """ from gasp.fm.web import http_to_json API_KEY = "b7bee0fa2b3ceb3408bd8245244b1479" URL = ("http://apilayer.net/api/check?access_key={}&email={}&" "smtp=1&format=1").format(API_KEY, str(email)) jsonArray = http_to_json(URL) return jsonArray["smtp_check"]
def get_location(facebook_id): """ Return the absolute location (x, y) of some facebook id """ from gasp.fm.web import http_to_json url = '{grph}{__id}?fields=location&access_token={t_id}|{scret}'.format( grph=FACEBOOK_GRAPH_URL, __id=str(facebook_id), t_id=FACEBOOK_TOKEN['APP_ID'], scret=FACEBOOK_TOKEN['APP_SECRET']) data = http_to_json(url)['location'] return data['longitude'], data['latitude']
def get_all_fields_by_id(facebook_id, data_type): """ Return all data avaiable for a post, photo, video, etc. """ from gasp.fm.web import http_to_json url = '{base}{_id_}/?fields={fld}&access_token={t_id}|{scret}'.format( base=FACEBOOK_GRAPH_URL, _id_=str(facebook_id), fld=','.join(FACEBOOK_NODE_FIELDS[data_type]), t_id=FACEBOOK_TOKEN['APP_ID'], scret=FACEBOOK_TOKEN['APP_SECRET']) data = http_to_json(url) return data
def extract_by_page(face_page, data_type='posts', nposts=100, returnNext=None, apiKeyToUse=None): """ Extract data from one or several Facebook pages using the Facebook GRAPH API The data_type could be: * Posts * Photos * Videos * Locations Reference Doc: https://developers.facebook.com/docs/graph-api/reference/v3.1/post """ import pandas from gasp.fm.web import http_to_json if not apiKeyToUse: KEY_ID, KEY_SECRET = FACEBOOK_TOKEN['APP_ID'], FACEBOOK_TOKEN[ 'APP_SECRET'] else: KEY_ID, KEY_SECRET = apiKeyToUse FIELDS = [ 'message', 'story', 'created_time', 'description', 'full_picture', 'link', 'place', 'type' ] if data_type == 'posts' else None URL = ('{graph}{page}/{dt}/?key=value&access_token={_id}|{secret}' '&limit=100{flds}').format(graph=FACEBOOK_GRAPH_URL, page=face_page, _id=KEY_ID, secret=KEY_SECRET, dt=data_type, flds='' if not FIELDS else '&fields={}'.format(",".join(FIELDS))) try: raw_data = http_to_json(URL) except: print(URL) return None, None data = pandas.DataFrame(raw_data["data"]) if nposts <= 100: if not returnNext: return sanitizeData(data, FACE_PAGE=face_page) else: if 'paging' in raw_data: if 'next' in raw_data['paging']: return sanitizeData( data, FACE_PAGE=face_page), raw_data["paging"]["next"] else: return sanitizeData(data, FACE_PAGE=face_page), None else: return sanitizeData(data, FACE_PAGE=face_page), None else: N = int(round(nposts / 100.0, 0)) new_URL = raw_data["paging"]["next"] for n in range(N - 1): try: moreRawData = http_to_json(new_URL) except: return None, None data = data.append(pandas.DataFrame(moreRawData['data']), ignore_index=True) if 'paging' in moreRawData: if 'next' in moreRawData['paging']: new_URL = moreRawData["paging"]["next"] else: break else: break if not returnNext: return sanitizeData(data, FACE_PAGE=face_page) else: return sanitizeData(data, FACE_PAGE=face_page), new_URL
def extract_from_url_and_next(url, Nnext=None, returnNext=None): """ Extract data from Facebook URL and from next URL's until fullfil Nnext """ import pandas from gasp.fm.web import http_to_json raw_data = http_to_json(url) data = pandas.DataFrame(raw_data["data"]) if not Nnext: if not returnNext: return sanitizeData(data) else: if 'paging' in raw_data: if 'next' in raw_data['paging']: return sanitizeData(data, raw_data["paging"]["next"]) else: return sanitizeData(data, None) else: return sanitizeData(data, None) else: if 'paging' not in raw_data: if not returnNext: return sanitizeData(data) else: return data, None if 'next' not in raw_data['paging']: if not returnNext: return data else: return data, None for i in range(Nnext): new_URL = raw_data["paging"]["next"] moreRawData = http_to_json(new_URL) data = data.append(pandas.DataFrame(moreRawData['data']), ignore_index=True) if 'paging' in moreRawData: if 'next' in moreRawData['paging']: new_URL = moreRawData['paging']['next'] else: break else: break if not returnNext: return sanitizeData(data) else: return sanitizeData(data), new_URL