def test_search(): graph = GraphAPI('<access token>') mock_request.return_value.content = json.dumps({ 'data': [ { 'message': 'I don\'t like your chair.' }, { 'message': 'Don\'t let your mouth get your ass in trouble.' } ] }) graph.search( term = 'shaft quotes', type = 'post' ) mock_request.assert_called_with('GET', 'https://graph.facebook.com/search', allow_redirects = True, params = { 'q': 'shaft quotes', 'type': 'post', 'access_token': '<access token>' } )
def test_search(): graph = GraphAPI('<access token>') mock_request.return_value.content = json.dumps({ 'data': [ { 'message': 'I don\'t like your chair.' }, { 'message': 'Don\'t let your mouth get your ass in trouble.' } ] }) graph.search( term='shaft quotes', type='post' ) mock_request.assert_called_with( 'GET', 'https://graph.facebook.com/search', allow_redirects=True, verify=True, timeout=None, params={ 'q': 'shaft quotes', 'type': 'post', 'access_token': '<access token>' } )
def test_search(): graph = GraphAPI(TEST_USER_ACCESS_TOKEN) response.content = json.dumps({ 'data': [ { 'message': 'I don\'t like your chair.' }, { 'message': 'Don\'t let your mouth get your ass in trouble.' } ] }) # Test a simple search graph.search( term = 'shaft quotes', type = 'post' ) mock_request.assert_called_with('GET', 'https://graph.facebook.com/search', allow_redirects = True, params = { 'q': 'shaft quotes', 'type': 'post', 'access_token': TEST_USER_ACCESS_TOKEN } )
def searchFacebook(user): { """todo obtain authentication token this may require a login implementation """ graph = GraphAPI(access_token) graph.search(username, user.name, page=False, retry=3) }
def test_search(): graph = GraphAPI() results = graph.search( term = 'the meaning of life', type = 'post' ) assert results.__class__ is list
def test_search(): graph = GraphAPI() results = graph.search( term = 'the meaning of life', type = 'post' ) assert isinstance(results['data'], list)
class FacebookMiner(object): page_fields = '?fields=is_community_page,category,category_list,fan_count,hours,link,location,name,name_with_location_descriptor,overall_star_rating,parking,phone,rating_count,single_line_address,store_location_descriptor,website,were_here_count' def __init__(self, mine_points, API_KEY, search_rayon=1000, categories=['FOOD_BEVERAGE'], _type='place'): self.points = mine_points self.graph = GraphAPI(API_KEY, version='2.9') self.categories = categories self.r = search_rayon self.dim = len(self.points) self._type = _type def _mine(self, progress=True): if progress: self.bar = FillingSquaresBar('Mining:', max=self.dim) for p in self.points: for pla in self.get_places(p): yield pla self.bar.next() self.bar.finish() else: for p in self.points: for pla in self.get_places(p): yield pla def get_places(self, p): c = str(p[0]) + ',' + str(p[1]) nearby_ids = [ l['id'] for l in self.graph.search(term='', categories=str(self.categories), type=self._type, center=c, distance=self.r)['data'] ] for _id in nearby_ids: entity = self.graph.get(str(_id) + self.page_fields) entity['fb_id'] = entity.pop('id') try: entity['location']['latitude'] = float( entity['location'].pop('latitude')) entity['location']['longitude'] = float( entity['location'].pop('longitude')) except Exception: pass try: entity['overall_star_rating'] = float( entity.pop('overall_star_rating')) except Exception: pass yield entity
def format_feed(self): graph = GraphAPI() response = graph.get(path='oauth/access_token', client_id=self.app_id, client_secret=self.app_secret, grant_type="client_credentials") token = parse.urlparse(response).path token = token.split("=")[1] graph = GraphAPI(token) things = graph.search("the", "post") print(things) listings = [] for i in range(0, self.num_found): self.title = photos_lst[i]['title'] photo_id = photos_lst[i]['id'] secret = photos_lst[i]['secret'] # Photo information photo_info = flickr.photos.getInfo(photo_id=photo_id, secret=secret, extras="original_format") server_id = str(photo_info["photo"]["server"]) farm_id = str(photo_info["photo"]["farm"]) date = str(photo_info["photo"]["dates"]["taken"]) nsid = str(photo_info["photo"]["owner"]["nsid"]) ext = photo_info["photo"].get("originalformat", "jpg") self.owner = photo_info["photo"]["owner"]["username"] self.owner_url = flickr.urls.getUserProfile(user_id=nsid)["user"]["url"] self.image_url = photo_info["photo"]["urls"]["url"][0]["_content"] # Location information lat = float(photo_info["photo"]["location"]["latitude"]) long = float(photo_info["photo"]["location"]["longitude"]) neighbourhood = "Somewhere" if "neighbourhood" in photo_info["photo"]["location"]: neighbourhood = photo_info["photo"]["location"]["neighbourhood"]["_content"] region = photo_info["photo"]["location"]["region"]["_content"] self.location = neighbourhood + ", " + region # URLs self.url = "http://farm" + farm_id + ".staticflickr.com/" + server_id + "/" + photo_id + "_" + secret + "." + ext self.thumb_url = "http://farm" + farm_id + ".staticflickr.com/" + server_id + "/" + photo_id + "_" + secret + "_n." + ext listing = {"occurred_on":date, "latitude":lat, "longitude":long, "description": self.getDescription(), "category_name":"Flickr Photos (NZ)", "source_id":self.url, "attachment_url":self.url} #create a list of dictionaries listings.insert(0, listing) return listings
def test_search(): graph = GraphAPI("<access token>") mock_request.return_value.content = json.dumps( { "data": [ {"message": "I don't like your chair."}, {"message": "Don't let your mouth get your ass in trouble."}, ] } ) mock_request.return_value.status_code = 200 graph.search(term="shaft quotes", type="post") mock_request.assert_called_with( "GET", "https://graph.facebook.com/search", allow_redirects=True, verify=True, timeout=None, params={"q": "shaft quotes", "type": "post", "access_token": "<access token>"}, )
from selenium import webdriver from facepy import GraphAPI from json import dump from time import sleep startup_fan_pages = {} access_token = "access_token" # get it here https://developers.facebook.com/tools/explorer/ graph = GraphAPI(access_token) browser = webdriver.Firefox() browser.get("http://startupsinnepal.com") # data source containing listings of startups in Nepal sleep(40) # wait for the browser to completely load the page startups = [ startup.text.lower() for startup in browser.find_elements_by_class_name("panel-title") ] # a list containing startups in nepal for startup in startups: r = graph.search(startup, "page", page=False, retry=3) # page=False is to refuse to get a generator, if len(r["data"]) > 0: startup_fan_pages[r["data"][0]["name"]] = str(r["data"][0]["id"]) # print(startup_fan_pages) with open("startupsinnepalfanpages.json", "w") as fp: dump(startup_fan_pages, fp)
def index(request): if request.user: social = request.user.social_auth.get(provider='facebook') access_token = social.extra_data['access_token'] graph = GraphAPI(access_token) ids = [] events = [] filtered = [] check = [] city = graph.get('me?fields=location')['location']['name'] geolocator = Nominatim() location = geolocator.geocode(city) coordinates = '%f,%f' % (location.latitude, location.longitude) # queries for places places = graph.search('*', 'place', page=False, retry=3, center=coordinates, distance=10000, limit=1000, fields='name') # query = '*' # url = 'https://graph.facebook.com/search?q=%s&type=place¢er=40.110539,-88.228411&distance=5000&limit=1000&fields=name' % query # parameters = {'access_token': access_token} # r = requests.get(url, params = parameters) # places = json.loads(r.text) for place in places['data']: ids.append(place['id']) startIndex = 0 id_list = [] for a in range(len(ids) / 50): for b in range(50): id_list.append(ids[startIndex + b]) url = 'https://graph.facebook.com/v2.5/?ids=%s&fields=id,name,cover.fields(id,source),picture.type(large),location,events.fields(id,name,cover.fields(id,source),picture.type(large),description,start_time,attending_count,declined_count,maybe_count,noreply_count)' % ','.join(id_list) parameters = {'access_token': access_token} r = requests.get(url, params = parameters) data = json.loads(r.text) for place_id in data: if 'events' in data[place_id]: for c in range(len(data[place_id]['events']['data'])): events.append(data[place_id]['events']['data'][c]) startIndex += 50 id_list = [] for i in range(len(ids) % 50): id_list.append(ids[startIndex + i]) check.append(','.join(id_list)) url = 'https://graph.facebook.com/v2.5/?ids=%s&fields=id,name,cover.fields(id,source),picture.type(large),location,events.fields(id,name,cover.fields(id,source),picture.type(large),description,start_time,attending_count,declined_count,maybe_count,noreply_count)' % ','.join(id_list) parameters = {'access_token': access_token} r = requests.get(url, params = parameters) data = json.loads(r.text) for place_id in data: if 'events' in data[place_id]: for d in range(len(data[place_id]['events']['data'])): events.append(data[place_id]['events']['data'][d]) count = 0 for event in events: if parse_event(event): count += 1 filtered.append(event) return render(request, 'index.html', {'user': request.user, 'events': filtered}) else: return render(request, 'index.html', {'user': request.user})
class processGraph: def __init__(self, key=None): global key_index """ >>> Graph = processGraph() You May Initialise Facebook GraphAPI with your own AppID and AppSecret >>> Graph = processGraph("<<App_ID>>|<<App_Secret>>") """ if not key: while True: self.graph = GraphAPI(KEYS_FB[key_index]) try: self.graph.search("test", "place") break except: key_index = (key_index + 1) % len(KEYS_FB) else: self.graph = GraphAPI(key) def searchPlace(self, row): name, city, pin = row['Name'], row['City'].lower(), row['Pincode'] city = city.lower() search_result = self.graph.get( "search?q=%s&fields=location&type=place&limit=10" % (name)) probable = None for place in search_result['data']: if not 'location' in place: continue if unicode(pin) == unicode(place['location']['zip']): return self.graph.get( place['id'] + "?fields=location,is_verified,description,phone,link,cover,website" ) if city == place['location']['city'].lower(): probable = place['id'] if probable: return self.graph.get( probable + "?fields=location,description,is_verified,phone,link,cover,website" ) return dict() def _repairDetails(self, row, node): if 'description' in node and not row['Details']: row['Details'] = node['description'] #print "Added description "+node['description'][:40]+" to "+row["Name"]+" from facebook" return 1 return 0 def _repairWebsite(self, row, node): if not row['Website']: if 'website' in node: row['Website'] = node['website'] #print "Added website "+node['website']+" to "+row["Name"]+" from facebook" return 1 return 0 def _repairPin(self, row, node): if 'location' in node: if not row['Pincode'] and 'zip' in node['location']: row['Pincode'] = node['location']['zip'] #print "Added pin "+node['location']['zip']+" to "+row["Name"]+" from facebook" return 1 return 0 def _repairStreet(self, row, node): if 'location' in node: if not row['Street Address'] and 'street' in node['location']: row['Street Address'] = node['location']['street'] #print "Added address "+node['location']['street']+" to "+row["Name"]+" from facebook" return 1 return 0 def _addPage(self, row, node): if 'link' in node: row['fb_page'] = node['link'] #print "Added page "+node['link']+" to "+row["Name"]+" from facebook" return 1 return 0 def _isVerified(self, row, node): if 'is_verified' in node: if node['is_verified']: row['fb_verified'] = 'True' return 1 row['fb_verified'] = 'False' return 0 def _addCover(self, row, node): if 'cover' in node: row['Images URL'] = node['cover']['source'] + "," + row[ 'Images URL'] #print "Added cover "+node['cover']['source']+" to "+row["Name"]+" from facebook" return 1 return 0 def _addPicture(self, row, node): if not 'id' in node: return 0 profile_pic = self.graph.get(node['id'] + "/picture?height=500&width=500&redirect") if 'data' in profile_pic: if 'url' in profile_pic['data'] and 'is_silhouette' in profile_pic[ 'data']: if not profile_pic['data']['is_silhouette']: row['Images URL'] += profile_pic['data']['url'] + "," return 1 return 0 def processSelective(self, rows, selection): """ Available Selections are: _repairDetails _repairWebsite _repairPin _repairStreet _addPage _addCover _addPicture e.g. >>> Graph = processGraph() >>> Graph.processSelective(CSV_Dictionary,'_repairDetails') """ stat = 0 if selection in dir(self): method = getattr(self, selection) for row in rows: try: node = self.searchPlace(row) stat += method(row, node) except: logging.exception("Error loading %s from facebook for %s" % (selection, row['Name'])) print "New Info Added from Facebook\n%s:%d" % (stat) def processAll(self, rows): details, link, cover, website, pincode, street, dp, verified = 0, 0, 0, 0, 0, 0, 0, 0 #stats for row in rows: try: node = self.searchPlace(row) details += self._repairDetails(row, node) website += self._repairWebsite(row, node) pincode += self._repairPin(row, node) street += self._repairStreet(row, node) link += self._addPage(row, node) cover += self._addCover(row, node) dp += self._addPicture(row, node) verified += self._isVerified(row, node) except: logging.exception( "Error loading information from facebook for " + row['Name']) print "New Info Added from Facebook\nDetails:%d Facebook Link:%d Cover:%d \nWebsite:%d Pincode:%d Address:%d Images:%d Verified %d/%d" % ( details, link, cover, website, pincode, street, dp, verified, link)
class processGraph: def __init__(self, key=None): self.key_index = 0 self.viewFactor = 0 """ >>> Graph = processGraph() You May Initialise Facebook GraphAPI with your own AppID and AppSecret >>> Graph = processGraph("<<App_ID>>|<<App_Secret>>") """ if not key: while True: self.graph = GraphAPI(KEYS_FB[self.key_index]) try: self.graph.search("test", "place") break except: self.key_index = (self.key_index + 1) % len(KEYS_FB) else: self.graph = GraphAPI(key) # FOR STATE DATA TO BE USED BY Graph API self.state_data_rows = [] file_name = glob.glob('./state_data/city_state.csv') state_file = open(file_name[0], 'r') state_reader = csv.DictReader(state_file, dialect=csv.excel) self.state_data_rows.extend(state_reader) state_file.close() def graceful_request(self, url): while True: try: result = self.graph.get(url) return result except: print 'ERROR. CHANGING KEY from index', self.key_index, self.key_index = (self.key_index + 1) % len(KEYS_FB) print 'to', self.key_index self.graph = GraphAPI(KEYS_FB[self.key_index]) def number_parser(self, x): flag_add = False numerals = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] allowed_start_symbols = numerals + ['+'] ############ #INITIAL CLEANUP x = x.strip() idx = 0 for _ in x: if _ in allowed_start_symbols: break idx += 1 x = x[idx:] ############# if x.find('+91') == 0 or x.find('91 ') == 0: flag_add = True word = '' phone_number = [] if flag_add: word = list(x[3:]) else: word = list(x) non_zero_encountered = False for letter in word: # REMOVES 0 FROM START OF NUMBERS if not non_zero_encountered: if letter in numerals[1:]: non_zero_encountered = True if non_zero_encountered: if letter in numerals: phone_number.append(letter) return ''.join(phone_number) def website_parser(self, x): if not x: return '' ############ #INITIAL CLEANUP x = x.strip() x = x.replace('//www.', '//') ############# filler_flag = False fillers = ['/', '#'] for _ in fillers: if _ in x[-1]: filler_flag = True if filler_flag: x = x[:-1] return x def match_website(self, website, resp): if website == self.website_parser(resp): return True return False def match_phone_nos(self, phones, resp): # DECREASING SEPARATOR PRIORITY ORDER separators = [',', '/', ';', '&'] resp_nos = [] sep_found = False for separator in separators: if resp.find(separator) != -1: resp_nos.extend(resp.split(separator)) sep_found = True break if not sep_found: resp_nos.append(resp) for i in range(len(resp_nos)): resp_nos[i] = safe_dec_enc(resp_nos[i]) for x in resp_nos: if self.number_parser(x) in phones: return True return False def analyze_prediction(self, row, query, allow_website_match): pin = row['Pincode'] phones = [] websites = [] emails = [] for i in range(1, 6): if row['Phone' + str(i)]: phones.append(self.number_parser(row['Phone' + str(i)])) if row['Website']: websites.append(row['Website'].strip()) if row['Website2']: websites.append(row['Website2'].strip()) if row['Mail']: emails.append(row['Mail'].strip()) if row['Mail2']: emails.append(row['Mail2'].strip()) search_result = self.graceful_request( "search?q=%s&fields=location,phone,emails,website&type=place&limit=50" % (query)) for place in search_result['data']: if 'location' in place: if 'zip' in place['location']: if unicode(pin) == unicode( place['location']['zip']) and unicode(pin): node = self.graceful_request( place['id'] + "?fields=name,location,is_verified,description,phone,link,cover,website,emails" ) return node if 'phone' in place and phones: if self.match_phone_nos(phones, safe_dec_enc(place['phone'])): node = self.graceful_request( place['id'] + "?fields=name,location,is_verified,description,phone,link,cover,website,emails" ) return node for email in emails: if 'emails' in place and email: for x in place['emails']: if email == safe_dec_enc(x): node = self.graceful_request( place['id'] + "?fields=name,location,is_verified,description,phone,link,cover,website,emails" ) return node # WEBSITE MATCH IS NOT SAFE. HENCE SHOULD BE DONE ONLY IF ABOVE MEASURES FAILS. if allow_website_match: for website in websites: website = self.website_parser(website) match = False multiple_match = False correct_place_id = '' for place in search_result['data']: if 'website' in place and website: if self.match_website(website, safe_dec_enc(place['website'])): if not match: correct_place_id = place['id'] match = True else: multiple_match = True break if match and not multiple_match: node = self.graceful_request( correct_place_id + "?fields=name,location,is_verified,description,phone,link,cover,website,emails" ) return node return dict() def searchPlace(self, row, state): ################ row['Name'] = safe_dec_enc(row['Name'], True) row['Locality'] = safe_dec_enc(row['Locality'], True) row['City'] = safe_dec_enc(row['City'], True) ################ self.viewFactor = 0 node = None if row['Locality']: query = row['Name'] + ', ' + row['Locality'] node = self.analyze_prediction(row, query, True) if not node and row['City']: query = row['Name'] + ', ' + row['City'] node = self.analyze_prediction(row, query, True) if not node: query = row['Name'] + ', ' + state node = self.analyze_prediction(row, query, True) if not node: query = row['Name'] node = self.analyze_prediction(row, query, False) return node def _repairDetails(self, row, node): if 'description' in node and not row['Details']: row['Details'] = node['description'] #print "Added description "+node['description'][:40]+" to "+row["Name"]+" from facebook" self.viewFactor += 1 return 1 return 0 def _repairWebsite(self, row, node): if not row['Website']: if 'website' in node: row['Website'] = node['website'] #print "Added website "+node['website']+" to "+row["Name"]+" from facebook" return 1 return 0 def _repairPin(self, row, node): if 'location' in node: if not row['Pincode'] and 'zip' in node['location']: row['Pincode'] = node['location']['zip'] #print "Added pin "+node['location']['zip']+" to "+row["Name"]+" from facebook" return 1 return 0 def _repairStreet(self, row, node): if 'location' in node: if not row['Street Address'] and 'street' in node['location']: row['Street Address'] = node['location']['street'] #print "Added address "+node['location']['street']+" to "+row["Name"]+" from facebook" return 1 return 0 def _addPage(self, row, node): if 'link' in node: row['fb_page'] = node['link'] #print "Added page "+node['link']+" to "+row["Name"]+" from facebook" self.viewFactor += 1 return 1 return 0 def _isVerified(self, row, node): if 'is_verified' in node: if node['is_verified']: row['fb_verified'] = 'True' self.viewFactor += 2 return 1 row['fb_verified'] = 'False' return 0 def _addCover(self, row, node): if 'cover' in node: row['Images URL'] = node['cover']['source'] + "," + row[ 'Images URL'] #print "Added cover "+node['cover']['source']+" to "+row["Name"]+" from facebook" self.viewFactor += 1 return 1 return 0 def _addEmails(self, row, node): check = 0 if 'emails' in node: for i in node['emails']: if row['Mail'] and i.encode( 'utf-8', 'ignore').strip() not in row['Mail'].strip(): row['Mail2'] = i return check + 1 row['Mail'] = i check = 1 return check def _addPhone(self, row, node): if 'phone' in node: ph = map(UTF8, node['phone'].split(',')) for i in range(1, 6): if not row['Phone' + str(i)]: break for j, p in zip(range(i + 1, 6), ph): row['Phone' + str(j)] = p.strip() #print "Added phone "+p.strip()+" in "+'Phone'+str(j)+" from facebook"+str(node['location']) return 1 return 0 def _addPicture(self, row, node): if not 'id' in node: return 0 profile_pic = self.graceful_request( node['id'] + "/picture?height=500&width=500&redirect") if 'data' in profile_pic: if 'url' in profile_pic['data'] and 'is_silhouette' in profile_pic[ 'data']: if not profile_pic['data']['is_silhouette']: if row['Images URL'] != None: row['Images URL'] = profile_pic['data'][ 'url'] + "," + row['Images URL'] self.viewFactor += 2 return 1 return 0 def processSelective(self, rows, selection): """ Available Selections are: _repairDetails _repairWebsite _repairPin _repairStreet _addPage _addCover _addPicture _addPhone _addEmail e.g. >>> Graph = processGraph() >>> Graph.processSelective(CSV_Dictionary,'_repairDetails') """ stat = 0 if selection in dir(self): method = getattr(self, selection) for row in rows: try: node = self.searchPlace(row) stat += method(row, node) except: logging.exception("Error loading %s from facebook for %s" % (selection, row['Name'])) print("New Info Added from Facebook\n%s:%d" % (stat)) def _addViews(self, row): if row['Total Views']: row['Total Views'] += self.viewFactor * randint(100, 200) else: row['Total Views'] = self.viewFactor * randint(100, 200) def _nodePhotos(self, row, node): if 'id' not in node: return photos = [] after = '' while True: resp = self.graceful_request( node['id'] + '/photos?type=uploaded&fields=source&limit=10&after=%s' % after) if 'data' in resp: for i in resp['data']: photos.append(i['source']) if 'paging' in resp: after = resp['paging']['cursors']['after'] if 'next' not in resp['paging']: break else: break # TO GUARANTEE QUICK TERMINATION if len(photos) >= 10: break row_data = '' for photo in photos: if row_data: row_data += ',' + photo else: row_data = photo row['fb_photos'] = row_data def _nodeVideos(self, row, node): if 'id' not in node: return videos = [] after = '' while True: resp = self.graceful_request( node['id'] + '/videos?type=uploaded&fields=source,title,description&limit=10&after=%s' % after) if 'data' in resp: for i in resp['data']: videos.append(i) if 'paging' in resp: after = resp['paging']['cursors']['after'] if 'next' not in resp['paging']: break else: break # TO GUARANTEE QUICK TERMINATION if len(videos) >= 10: break row_data = '' for video in videos: x = '' if 'title' in video: x = '{"title":"%s","link":"%s"}' % (video['title'].encode( 'ascii', 'ignore').replace('"', ''), video['source']) elif 'description' in video: x = '{"title":"%s","link":"%s"}' % ( video['description'].encode('ascii', 'ignore').replace( '"', ''), video['source']) else: x = '{"title":"%s","link":"%s"}' % ('', video['source']) if row_data: row_data += ',' + x else: row_data = x row['fb_videos'] = row_data def _nodeWorkingHours(self, row, node): if 'id' not in node: return resp = self.graceful_request(node['id'] + '?fields=hours') row_data = '' if 'hours' in resp: try: row_data = parseFBWorkHours.parse(resp['hours']) except: logging.exception("Error parsing Working Hours for" + row['Name']) row['fb_workingHours'] = row_data def _nodePosts(self, row, node): if 'id' not in node: return posts = [] after = '' while True: resp = self.graceful_request( node['id'] + '/posts?fields=message,type,created_time&limit=90&next=%s' % after) if 'data' in resp: for i in resp['data']: if i['type'] == 'status' and 'message' in i: posts.append(i) if 'paging' in resp: after = resp['paging']['next'] else: break # TO GUARANTEE QUICK TERMINATION if len(posts) >= 10: break row_data = '' for post in posts: x = '{"created_time":"%s","message":"%s"}' % ( post['created_time'], post['message'].encode( 'ascii', 'ignore').replace('"', '')) if row_data: row_data += ',' + x else: row_data = x row['fb_posts'] = row_data def _mergeData(self, row): # Merge Photos if row['fb_photos']: if row['Images URL']: row['Images URL'] = row['fb_photos'] + ',' + row['Images URL'] else: row['Images URL'] = row['fb_photos'] if not row['Working Hours'] and row['fb_workingHours']: row['Working Hours'] = row['fb_workingHours'] def processAll(self, rows, state): details, link, cover, website, pincode, street, dp, verified, phone, email = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 #stats total = len(rows) print("\nFetching info from FB Graph") print 'STATE : ', state for progress, row in enumerate(rows): try: node = self.searchPlace(row, state) details += self._repairDetails(row, node) website += self._repairWebsite(row, node) pincode += self._repairPin(row, node) street += self._repairStreet(row, node) link += self._addPage(row, node) phone += self._addPhone(row, node) email += self._addEmails(row, node) verified += self._isVerified(row, node) self._addViews(row) #self._nodePosts(row,node) self._nodeVideos(row, node) self._nodePhotos(row, node) self._nodeWorkingHours(row, node) self._mergeData(row) # ENSURES COVER/DP AS THE FIRST PICTURE cover += self._addCover(row, node) dp += self._addPicture(row, node) ## pro=int((float(progress)/total)*100) # Comment out to avoid Bad characters in logs ## sys.stdout.write("\r%d%%"%pro) ## sys.stdout.flush() except: logging.exception( "Error loading information from facebook for " + row['Name']) ## sys.stdout.write("\r100%") ## sys.stdout.flush() print( "\nNew Info Added from Facebook\nDetails:%d Facebook Link:%d Cover:%d \nWebsite:%d Pincode:%d Address:%d Images:%d Verified %d/%d Phone:%d Emails:%d" % (details, link, cover, website, pincode, street, dp, verified, link, phone, email))
def scrapeFacebookPageFeedStatus(page_id1, access_token, since_date, until_date): total = 0 count = 0 graph = GraphAPI( 'EAACEdEose0cBAPtLJurRzWQuf3L0jzczs4OAXnaseF8MKMnAAJKW3zAH1HaepKj7FeC28WdN7v6CNyzLDfj2GhhlxGnpZB3TZA3O2TE89RsPTDTCcVYxZBFrx1ZBE3NnqQuFFwXPHMnZAEG6khLIZCRk50STZB52ULOZBoQLVhwYyyJ3sZCrejdcFGDY3Y0DP5iDfOPqqlZCuXsAZDZD' ) events = graph.search(page_id1, 'page', page=True) for col in events: for rec in col['data']: page_id = rec['id'] #print(page_id) with open('{}_facebook_statuses.csv'.format(page_id1), 'w') as file: has_next_page = True num_processed = 0 scrape_starttime = datetime.datetime.now() after = '' base = "https://graph.facebook.com/v2.9" node = "/{}/posts".format(page_id) node2 = "/{}".format(page_id) parameters = "/?limit={}&access_token={}".format(100, access_token) since = "&since={}".format(since_date) if since_date \ is not '' else '' until = "&until={}".format(until_date) if until_date \ is not '' else '' #print("Scraping {} Facebook Page: {}\n".format(page_id, scrape_starttime)) base_url1 = base + node2 + parameters + "&fields=name,fan_count" total_like = json.loads(request_until_succeed(base_url1)) #print(int(total_like["fan_count"])) total = int(total_like["fan_count"]) while has_next_page: after = '' if after is '' else "&after={}".format(after) base_url = base + node + parameters + after + since + until url = getFacebookPageFeedUrl(base_url) statuses = json.loads(request_until_succeed(url)) reactions = getReactionsForStatuses(base_url) for status in statuses['data']: # Ensure it is a status with the expected metadata if 'reactions' in status: status_data = processFacebookPageFeedStatus(status) reactions_data = reactions[status_data[0]] #print(str(status_data[6])) count = count + int(status_data[6]) # calculate thankful/pride through algebra num_special = status_data[6] - sum(reactions_data) #w.writerow(status_data + reactions_data + (num_special,)) num_processed += 1 # if there is no next page, we're done. if 'paging' in statuses: after = statuses['paging']['cursors']['after'] else: has_next_page = False print("\nDone!\n{} Statuses Processed in {}".format( num_processed, datetime.datetime.now() - scrape_starttime)) return count, total
def test_search(): graph = GraphAPI() results = graph.search(term='the meaning of life', type='post') assert results.__class__ is list
from facepy import GraphAPI import requests # #r = requests.get('https://graph.facebook.com/oauth/access_token?#client_id=1795242464078863&client_secret=9f6b035a6a70c37fd01c8027366f1f6a&grant_type=client_credentials') #access_token = r.text.split('=')[1] #print access_token graph = GraphAPI( 'EAACEdEose0cBAGKccEIslLDXlzgm62iIz4u4Tl6v6UZAxzrC1iuoJfTbXKrcrpfzJZCzfHfOEEZCWxmFsMxtlCOlnSHfUTfYGZAirQZAEJ2en55s1dQ6GdADoeuMXh6aVPOyBJ1fIwq2VoXcUd8VKfR6fjo6mzkM770KQzWXy8QZDZD' ) # access key generada en GraphAPI: https://developers.facebook.com/tools/explorer/ #EJEMPLOS #print graph.get(path = 'me') #print graph.get(path = 'me/friends') #print graph.get(path = 'me?fields=gender,languages,timezone') #print graph.get(path = 'elpais?fields=about ') #print graph.get(path = 'veronica.tortorella') ##devuelve ERROR porque no se puede buscar por username. #PRUEBO CON LIBRERIA REQUESTS, pasa lo mismo #print requests.get('https://graph.facebook.com/v2.2/veronica.tortorella?&access_token=EAACEdEose0cBAGKccEIslLDXlzgm62iIz4u4Tl6v6UZAxzrC1iuoJfTbXKrcrpfzJZCzfHfOEEZCWxmFsMxtlCOlnSHfUTfYGZAirQZAEJ2en55s1dQ6GdADoeuMXh6aVPOyBJ1fIwq2VoXcUd8VKfR6fjo6mzkM770KQzWXy8QZDZD').json() print graph.search(term='veronica tortorella', type='user') #PRUEBO CON LIBRERIA REQUESTS print requests.get( 'https://graph.facebook.com/v2.2/search?q=coffee&type=place¢er=37.76,-122.427&distance=1000&access_token=EAACEdEose0cBAGZBGvUPU4jjSMbTXtKhkhIeyrVGyyzQcXEVoBjoZCnnbzuK0Ug8G52ttXvMOMw8tgODPdayYrC2S0TcE7KtFHOe69cRjvWaAIuwDXpzDD8HZBp79WZAQLi3PKZAwrIYzrv85PrH5kGgaIcWQ7uao6vft4gPf7AZDZD' ).json()
print("{} Statuses Processed: {}".format( num_processed, datetime.datetime.now())) # if there is no next page, we're done. if 'paging' in statuses: after = statuses['paging']['cursors']['after'] else: has_next_page = False print("\nDone!\n{} Statuses Processed in {}".format( num_processed, datetime.datetime.now() - scrape_starttime)) if __name__ == '__main__': graph = GraphAPI( 'EAAWZBb15sYkIBAOMaLu7xSSdYRSU7nSKsozM05O9ZAIJZB58ZApDfxhbLmKgMsk2Un3X3yEJPtG7zP39jWZBP9rUaGQ3m8meHdQHEqzdGylpDkZB7HGCOOhB9Xq8jIepYJg3GmdEQpE88Ua2SACvdnPcz7PPoewnMZCtziQNSrp9ZBD4IppTnJKlaFkASlA6jQ4ZD' ) events = graph.search('Coleman Powersports- Falls Church', 'page', page=True) for col in events: for rec in col['data']: id1 = rec['id'] print(id1) scrapeFacebookPageFeedStatus(id1, access_token, since_date, until_date)
from facepy import GraphAPI import json import time startup_fan_pages = {} access_token = "CAACEdEose0cBAMRSYnOgvGS19ZAmraBuyVQ6lkrqnSJtyKD1oozQFeCYXvIc6IejhPEYRSQ3tptX3ZAhhiHgZCZA8OuBir51mzocHXBO1kpBgcTYHg1moSMwMc1kjy8abfZA3ZCGnpYtpoOyx0mmaruhzVCRi3w6t5KwXN2XIWxCA9h729ZBXmlza2KsNjCaDDeMsssqdaBHSYyn4AwUUqY" # get it here https://developers.facebook.com/tools/explorer/ graph = GraphAPI(access_token) browser = webdriver.Firefox() browser.get("http://startupsinnepal.com") time.sleep(40) #wait for the browser to completely load the page startups = browser.find_elements_by_class_name( "panel-title") #returns a list of objects having class="panel-title" print("startups found") for startup in startups: #print(startup.text) r = graph.search(startup.text.lower(), "page", page=False, retry=3) #page=False is to refuse to get a generator, if len(r['data']) > 0: startup_fan_pages[r['data'][0]['name']] = str(r['data'][0]['id']) #print(startup_fan_pages) print(startup_fan_pages) with open('startupsinnepalfanpages.json', 'w') as fp: json.dump(startup_fan_pages, fp)
graph = GraphAPI('EAAQb0kyF82kBAGQrFHsAuGbvUJ2MlQ6rIYhpVFWIV9RZBRuFutgcM5BCq92qEm5mtvLnCNgS2gTn9RP69S4X2MdwQHkMQmQiFrNAgCGS3XIdexFhPiTe07bvoFoWknbXivgzZCnkNshIFYbDiV85SsniYKe7kJTlzO4CYPRgZDZD') graph.get("me?fields=id,name,age_range,birthday,education,email")# get your profile information graph.get('me/friends')#number of friends connection you have graph.get('me/posts')#search for top posted created by you graph.search(term='diwali', type='event', page=False)#search for the term diwali from the events from urllib.request import urlopen data= "C:/Users/Riya/Desktop/PORTFOLIO.csv" my_portfolio=pd.read_csv(data)#reads the csv file symbol_list=list(my_portfolio["Stock Symbol"]) symbol_list