Beispiel #1
0
def test_search():
    graph = GraphAPI('<access token>')

    mock_request.return_value.content = json.dumps({
        'data': [
            {
                'message': 'I don\'t like your chair.'
            },
            {
                'message': 'Don\'t let your mouth get your ass in trouble.'
            }
        ]
    })

    graph.search(
        term = 'shaft quotes',
        type = 'post'
    )

    mock_request.assert_called_with('GET', 'https://graph.facebook.com/search',
        allow_redirects = True,
        params = {
            'q': 'shaft quotes',
            'type': 'post',
            'access_token': '<access token>'
        }
    )
def test_search():
    graph = GraphAPI('<access token>')

    mock_request.return_value.content = json.dumps({
        'data': [
            {
                'message': 'I don\'t like your chair.'
            },
            {
                'message': 'Don\'t let your mouth get your ass in trouble.'
            }
        ]
    })

    graph.search(
        term='shaft quotes',
        type='post'
    )

    mock_request.assert_called_with(
        'GET',
        'https://graph.facebook.com/search',
        allow_redirects=True,
        verify=True,
        timeout=None,
        params={
            'q': 'shaft quotes',
            'type': 'post',
            'access_token': '<access token>'
        }
    )
Beispiel #3
0
def test_search():
    graph = GraphAPI(TEST_USER_ACCESS_TOKEN)

    response.content = json.dumps({
        'data': [
            {
                'message': 'I don\'t like your chair.'
            },
            {
                'message': 'Don\'t let your mouth get your ass in trouble.'
            }
        ]
    })

    # Test a simple search
    graph.search(
        term = 'shaft quotes',
        type = 'post'
    )

    mock_request.assert_called_with('GET', 'https://graph.facebook.com/search',
        allow_redirects = True,
        params = {
            'q': 'shaft quotes',
            'type': 'post',
            'access_token': TEST_USER_ACCESS_TOKEN
        }
    )
def searchFacebook(user):

    {
    """todo obtain authentication token
    this may require a login implementation
    """
    graph = GraphAPI(access_token)
    graph.search(username, user.name, page=False, retry=3)
    }
Beispiel #5
0
def test_search():
    graph = GraphAPI()
    
    results = graph.search(
        term = 'the meaning of life',
        type = 'post'
    )

    assert results.__class__ is list
Beispiel #6
0
def test_search():
    graph = GraphAPI()

    results = graph.search(
        term = 'the meaning of life',
        type = 'post'
    )

    assert isinstance(results['data'], list)
Beispiel #7
0
class FacebookMiner(object):
    page_fields = '?fields=is_community_page,category,category_list,fan_count,hours,link,location,name,name_with_location_descriptor,overall_star_rating,parking,phone,rating_count,single_line_address,store_location_descriptor,website,were_here_count'

    def __init__(self,
                 mine_points,
                 API_KEY,
                 search_rayon=1000,
                 categories=['FOOD_BEVERAGE'],
                 _type='place'):
        self.points = mine_points
        self.graph = GraphAPI(API_KEY, version='2.9')
        self.categories = categories
        self.r = search_rayon
        self.dim = len(self.points)
        self._type = _type

    def _mine(self, progress=True):
        if progress:
            self.bar = FillingSquaresBar('Mining:', max=self.dim)
            for p in self.points:
                for pla in self.get_places(p):
                    yield pla
                self.bar.next()
            self.bar.finish()
        else:
            for p in self.points:
                for pla in self.get_places(p):
                    yield pla

    def get_places(self, p):
        c = str(p[0]) + ',' + str(p[1])
        nearby_ids = [
            l['id'] for l in self.graph.search(term='',
                                               categories=str(self.categories),
                                               type=self._type,
                                               center=c,
                                               distance=self.r)['data']
        ]
        for _id in nearby_ids:
            entity = self.graph.get(str(_id) + self.page_fields)
            entity['fb_id'] = entity.pop('id')
            try:

                entity['location']['latitude'] = float(
                    entity['location'].pop('latitude'))
                entity['location']['longitude'] = float(
                    entity['location'].pop('longitude'))
            except Exception:
                pass
            try:
                entity['overall_star_rating'] = float(
                    entity.pop('overall_star_rating'))
            except Exception:
                pass
            yield entity
    def format_feed(self):
        graph = GraphAPI()
        response = graph.get(path='oauth/access_token',
                             client_id=self.app_id,
                             client_secret=self.app_secret,
                             grant_type="client_credentials")
        token = parse.urlparse(response).path
        token = token.split("=")[1]
        graph = GraphAPI(token)
        things = graph.search("the", "post")
        print(things)
        listings = []

        for i in range(0, self.num_found):
            self.title = photos_lst[i]['title']
            photo_id = photos_lst[i]['id']
            secret = photos_lst[i]['secret']

            # Photo information
            photo_info = flickr.photos.getInfo(photo_id=photo_id, secret=secret, extras="original_format")
            server_id = str(photo_info["photo"]["server"])
            farm_id = str(photo_info["photo"]["farm"])
            date = str(photo_info["photo"]["dates"]["taken"])
            nsid = str(photo_info["photo"]["owner"]["nsid"])
            ext = photo_info["photo"].get("originalformat", "jpg")
            self.owner = photo_info["photo"]["owner"]["username"]
            self.owner_url = flickr.urls.getUserProfile(user_id=nsid)["user"]["url"]
            self.image_url = photo_info["photo"]["urls"]["url"][0]["_content"]

            # Location information
            lat = float(photo_info["photo"]["location"]["latitude"])
            long = float(photo_info["photo"]["location"]["longitude"])
            neighbourhood = "Somewhere"
            if "neighbourhood" in photo_info["photo"]["location"]:
                neighbourhood = photo_info["photo"]["location"]["neighbourhood"]["_content"]
            region  = photo_info["photo"]["location"]["region"]["_content"]
            self.location = neighbourhood + ", " + region

            # URLs
            self.url = "http://farm" + farm_id + ".staticflickr.com/" + server_id + "/" + photo_id + "_" + secret + "." + ext
            self.thumb_url = "http://farm" + farm_id + ".staticflickr.com/" + server_id + "/" + photo_id + "_" + secret + "_n." + ext

            listing = {"occurred_on":date,
                       "latitude":lat,
                       "longitude":long,
                       "description": self.getDescription(),
                       "category_name":"Flickr Photos (NZ)",
                       "source_id":self.url,
                       "attachment_url":self.url}

            #create a list of dictionaries
            listings.insert(0, listing)
        return listings
Beispiel #9
0
def test_search():
    graph = GraphAPI("<access token>")

    mock_request.return_value.content = json.dumps(
        {
            "data": [
                {"message": "I don't like your chair."},
                {"message": "Don't let your mouth get your ass in trouble."},
            ]
        }
    )
    mock_request.return_value.status_code = 200

    graph.search(term="shaft quotes", type="post")

    mock_request.assert_called_with(
        "GET",
        "https://graph.facebook.com/search",
        allow_redirects=True,
        verify=True,
        timeout=None,
        params={"q": "shaft quotes", "type": "post", "access_token": "<access token>"},
    )
from selenium import webdriver
from facepy import GraphAPI
from json import dump
from time import sleep

startup_fan_pages = {}
access_token = "access_token"  # get it here https://developers.facebook.com/tools/explorer/
graph = GraphAPI(access_token)

browser = webdriver.Firefox()
browser.get("http://startupsinnepal.com")  # data source containing listings of startups in Nepal
sleep(40)  # wait for the browser to completely load the page

startups = [
    startup.text.lower() for startup in browser.find_elements_by_class_name("panel-title")
]  # a list containing startups in nepal

for startup in startups:
    r = graph.search(startup, "page", page=False, retry=3)  # page=False is to refuse to get a generator,
    if len(r["data"]) > 0:
        startup_fan_pages[r["data"][0]["name"]] = str(r["data"][0]["id"])
# print(startup_fan_pages)
with open("startupsinnepalfanpages.json", "w") as fp:
    dump(startup_fan_pages, fp)
Beispiel #11
0
def index(request):
    if request.user:
        social = request.user.social_auth.get(provider='facebook')
        access_token = social.extra_data['access_token']
        graph = GraphAPI(access_token)

        ids = []
        events = []
        filtered = []
        check = []

        city = graph.get('me?fields=location')['location']['name']
        geolocator = Nominatim()
        location = geolocator.geocode(city)
        coordinates = '%f,%f' % (location.latitude, location.longitude)

        # queries for places
        places = graph.search('*', 'place', page=False, retry=3, center=coordinates, distance=10000, limit=1000, fields='name')

        # query = '*'
        # url = 'https://graph.facebook.com/search?q=%s&type=place&center=40.110539,-88.228411&distance=5000&limit=1000&fields=name' % query
        # parameters = {'access_token': access_token}
        # r = requests.get(url, params = parameters)
        # places = json.loads(r.text)

        for place in places['data']:
            ids.append(place['id'])

        startIndex = 0
        id_list = []

        for a in range(len(ids) / 50):
            for b in range(50):
                id_list.append(ids[startIndex + b])
            url = 'https://graph.facebook.com/v2.5/?ids=%s&fields=id,name,cover.fields(id,source),picture.type(large),location,events.fields(id,name,cover.fields(id,source),picture.type(large),description,start_time,attending_count,declined_count,maybe_count,noreply_count)' % ','.join(id_list)
            parameters = {'access_token': access_token}
            r = requests.get(url, params = parameters)
            data = json.loads(r.text)

            for place_id in data:
                if 'events' in data[place_id]:
                    for c in range(len(data[place_id]['events']['data'])):
                        events.append(data[place_id]['events']['data'][c])

            startIndex += 50
            id_list = []

        for i in range(len(ids) % 50):
            id_list.append(ids[startIndex + i])
        check.append(','.join(id_list))
        url = 'https://graph.facebook.com/v2.5/?ids=%s&fields=id,name,cover.fields(id,source),picture.type(large),location,events.fields(id,name,cover.fields(id,source),picture.type(large),description,start_time,attending_count,declined_count,maybe_count,noreply_count)' % ','.join(id_list)
        parameters = {'access_token': access_token}
        r = requests.get(url, params = parameters)
        data = json.loads(r.text)

        for place_id in data:
            if 'events' in data[place_id]:
                for d in range(len(data[place_id]['events']['data'])):
                    events.append(data[place_id]['events']['data'][d])

        count = 0

        for event in events:
            if parse_event(event):
                count += 1
                filtered.append(event)

        return render(request, 'index.html', {'user': request.user, 'events': filtered})

    else:
        return render(request, 'index.html', {'user': request.user})
Beispiel #12
0
class processGraph:
    def __init__(self, key=None):
        global key_index
        """
    >>> Graph = processGraph()
    You May Initialise Facebook GraphAPI with your own AppID and AppSecret
    >>> Graph = processGraph("<<App_ID>>|<<App_Secret>>")
    """
        if not key:
            while True:
                self.graph = GraphAPI(KEYS_FB[key_index])
                try:
                    self.graph.search("test", "place")
                    break
                except:
                    key_index = (key_index + 1) % len(KEYS_FB)
        else:
            self.graph = GraphAPI(key)

    def searchPlace(self, row):
        name, city, pin = row['Name'], row['City'].lower(), row['Pincode']
        city = city.lower()
        search_result = self.graph.get(
            "search?q=%s&fields=location&type=place&limit=10" % (name))
        probable = None
        for place in search_result['data']:
            if not 'location' in place:
                continue
            if unicode(pin) == unicode(place['location']['zip']):
                return self.graph.get(
                    place['id'] +
                    "?fields=location,is_verified,description,phone,link,cover,website"
                )
            if city == place['location']['city'].lower():
                probable = place['id']
        if probable:
            return self.graph.get(
                probable +
                "?fields=location,description,is_verified,phone,link,cover,website"
            )
        return dict()

    def _repairDetails(self, row, node):
        if 'description' in node and not row['Details']:
            row['Details'] = node['description']
            #print "Added description "+node['description'][:40]+" to "+row["Name"]+" from facebook"
            return 1
        return 0

    def _repairWebsite(self, row, node):
        if not row['Website']:
            if 'website' in node:
                row['Website'] = node['website']
                #print "Added website "+node['website']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _repairPin(self, row, node):
        if 'location' in node:
            if not row['Pincode'] and 'zip' in node['location']:
                row['Pincode'] = node['location']['zip']
                #print "Added pin "+node['location']['zip']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _repairStreet(self, row, node):
        if 'location' in node:
            if not row['Street Address'] and 'street' in node['location']:
                row['Street Address'] = node['location']['street']
                #print "Added address "+node['location']['street']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _addPage(self, row, node):
        if 'link' in node:
            row['fb_page'] = node['link']
            #print "Added page "+node['link']+" to "+row["Name"]+" from facebook"
            return 1
        return 0

    def _isVerified(self, row, node):
        if 'is_verified' in node:
            if node['is_verified']:
                row['fb_verified'] = 'True'
                return 1
            row['fb_verified'] = 'False'
        return 0

    def _addCover(self, row, node):
        if 'cover' in node:
            row['Images URL'] = node['cover']['source'] + "," + row[
                'Images URL']
            #print "Added cover "+node['cover']['source']+" to "+row["Name"]+" from facebook"
            return 1
        return 0

    def _addPicture(self, row, node):
        if not 'id' in node:
            return 0
        profile_pic = self.graph.get(node['id'] +
                                     "/picture?height=500&width=500&redirect")
        if 'data' in profile_pic:
            if 'url' in profile_pic['data'] and 'is_silhouette' in profile_pic[
                    'data']:
                if not profile_pic['data']['is_silhouette']:
                    row['Images URL'] += profile_pic['data']['url'] + ","
                    return 1
        return 0

    def processSelective(self, rows, selection):
        """
    Available Selections are:
                _repairDetails
                _repairWebsite
                _repairPin
                _repairStreet
                _addPage
                _addCover
                _addPicture
        e.g.
        >>> Graph = processGraph()
        >>> Graph.processSelective(CSV_Dictionary,'_repairDetails')
        """
        stat = 0
        if selection in dir(self):
            method = getattr(self, selection)
        for row in rows:
            try:
                node = self.searchPlace(row)
                stat += method(row, node)
            except:
                logging.exception("Error loading %s from facebook for %s" %
                                  (selection, row['Name']))
        print "New Info Added from Facebook\n%s:%d" % (stat)

    def processAll(self, rows):
        details, link, cover, website, pincode, street, dp, verified = 0, 0, 0, 0, 0, 0, 0, 0  #stats
        for row in rows:
            try:
                node = self.searchPlace(row)
                details += self._repairDetails(row, node)
                website += self._repairWebsite(row, node)
                pincode += self._repairPin(row, node)
                street += self._repairStreet(row, node)
                link += self._addPage(row, node)
                cover += self._addCover(row, node)
                dp += self._addPicture(row, node)
                verified += self._isVerified(row, node)
            except:
                logging.exception(
                    "Error loading information from facebook for " +
                    row['Name'])
        print "New Info Added from Facebook\nDetails:%d Facebook Link:%d Cover:%d \nWebsite:%d Pincode:%d Address:%d Images:%d Verified %d/%d" % (
            details, link, cover, website, pincode, street, dp, verified, link)
Beispiel #13
0
class processGraph:
    def __init__(self, key=None):
        self.key_index = 0
        self.viewFactor = 0
        """
    >>> Graph = processGraph()
    You May Initialise Facebook GraphAPI with your own AppID and AppSecret
    >>> Graph = processGraph("<<App_ID>>|<<App_Secret>>")
    """
        if not key:
            while True:
                self.graph = GraphAPI(KEYS_FB[self.key_index])
                try:
                    self.graph.search("test", "place")
                    break
                except:
                    self.key_index = (self.key_index + 1) % len(KEYS_FB)
        else:
            self.graph = GraphAPI(key)

        # FOR STATE DATA TO BE USED BY Graph API
        self.state_data_rows = []
        file_name = glob.glob('./state_data/city_state.csv')
        state_file = open(file_name[0], 'r')
        state_reader = csv.DictReader(state_file, dialect=csv.excel)
        self.state_data_rows.extend(state_reader)
        state_file.close()

    def graceful_request(self, url):
        while True:
            try:
                result = self.graph.get(url)
                return result
            except:
                print 'ERROR. CHANGING KEY from index', self.key_index,
                self.key_index = (self.key_index + 1) % len(KEYS_FB)
                print 'to', self.key_index
                self.graph = GraphAPI(KEYS_FB[self.key_index])

    def number_parser(self, x):
        flag_add = False
        numerals = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
        allowed_start_symbols = numerals + ['+']

        ############
        #INITIAL CLEANUP
        x = x.strip()
        idx = 0
        for _ in x:
            if _ in allowed_start_symbols:
                break
            idx += 1
        x = x[idx:]
        #############

        if x.find('+91') == 0 or x.find('91 ') == 0:
            flag_add = True

        word = ''
        phone_number = []
        if flag_add:
            word = list(x[3:])
        else:
            word = list(x)

        non_zero_encountered = False
        for letter in word:
            # REMOVES 0 FROM START OF NUMBERS
            if not non_zero_encountered:
                if letter in numerals[1:]:
                    non_zero_encountered = True

            if non_zero_encountered:
                if letter in numerals:
                    phone_number.append(letter)
        return ''.join(phone_number)

    def website_parser(self, x):
        if not x:
            return ''
        ############
        #INITIAL CLEANUP
        x = x.strip()
        x = x.replace('//www.', '//')
        #############

        filler_flag = False
        fillers = ['/', '#']
        for _ in fillers:
            if _ in x[-1]:
                filler_flag = True
        if filler_flag:
            x = x[:-1]
        return x

    def match_website(self, website, resp):
        if website == self.website_parser(resp):
            return True
        return False

    def match_phone_nos(self, phones, resp):
        # DECREASING SEPARATOR PRIORITY ORDER
        separators = [',', '/', ';', '&']

        resp_nos = []
        sep_found = False
        for separator in separators:
            if resp.find(separator) != -1:
                resp_nos.extend(resp.split(separator))
                sep_found = True
                break
        if not sep_found:
            resp_nos.append(resp)

        for i in range(len(resp_nos)):
            resp_nos[i] = safe_dec_enc(resp_nos[i])

        for x in resp_nos:
            if self.number_parser(x) in phones:
                return True
        return False

    def analyze_prediction(self, row, query, allow_website_match):

        pin = row['Pincode']
        phones = []
        websites = []
        emails = []

        for i in range(1, 6):
            if row['Phone' + str(i)]:
                phones.append(self.number_parser(row['Phone' + str(i)]))
        if row['Website']:
            websites.append(row['Website'].strip())
        if row['Website2']:
            websites.append(row['Website2'].strip())
        if row['Mail']:
            emails.append(row['Mail'].strip())
        if row['Mail2']:
            emails.append(row['Mail2'].strip())

        search_result = self.graceful_request(
            "search?q=%s&fields=location,phone,emails,website&type=place&limit=50"
            % (query))
        for place in search_result['data']:
            if 'location' in place:
                if 'zip' in place['location']:
                    if unicode(pin) == unicode(
                            place['location']['zip']) and unicode(pin):
                        node = self.graceful_request(
                            place['id'] +
                            "?fields=name,location,is_verified,description,phone,link,cover,website,emails"
                        )
                        return node

            if 'phone' in place and phones:
                if self.match_phone_nos(phones, safe_dec_enc(place['phone'])):
                    node = self.graceful_request(
                        place['id'] +
                        "?fields=name,location,is_verified,description,phone,link,cover,website,emails"
                    )
                    return node

            for email in emails:
                if 'emails' in place and email:
                    for x in place['emails']:
                        if email == safe_dec_enc(x):
                            node = self.graceful_request(
                                place['id'] +
                                "?fields=name,location,is_verified,description,phone,link,cover,website,emails"
                            )
                            return node

        # WEBSITE MATCH IS NOT SAFE. HENCE SHOULD BE DONE ONLY IF ABOVE MEASURES FAILS.
        if allow_website_match:
            for website in websites:
                website = self.website_parser(website)
                match = False
                multiple_match = False
                correct_place_id = ''

                for place in search_result['data']:
                    if 'website' in place and website:
                        if self.match_website(website,
                                              safe_dec_enc(place['website'])):
                            if not match:
                                correct_place_id = place['id']
                                match = True
                            else:
                                multiple_match = True
                                break

                if match and not multiple_match:
                    node = self.graceful_request(
                        correct_place_id +
                        "?fields=name,location,is_verified,description,phone,link,cover,website,emails"
                    )
                    return node

        return dict()

    def searchPlace(self, row, state):
        ################
        row['Name'] = safe_dec_enc(row['Name'], True)
        row['Locality'] = safe_dec_enc(row['Locality'], True)
        row['City'] = safe_dec_enc(row['City'], True)
        ################
        self.viewFactor = 0
        node = None

        if row['Locality']:
            query = row['Name'] + ', ' + row['Locality']
            node = self.analyze_prediction(row, query, True)
        if not node and row['City']:
            query = row['Name'] + ', ' + row['City']
            node = self.analyze_prediction(row, query, True)
        if not node:
            query = row['Name'] + ', ' + state
            node = self.analyze_prediction(row, query, True)
        if not node:
            query = row['Name']
            node = self.analyze_prediction(row, query, False)
        return node

    def _repairDetails(self, row, node):
        if 'description' in node and not row['Details']:
            row['Details'] = node['description']
            #print "Added description "+node['description'][:40]+" to "+row["Name"]+" from facebook"
            self.viewFactor += 1
            return 1
        return 0

    def _repairWebsite(self, row, node):
        if not row['Website']:
            if 'website' in node:
                row['Website'] = node['website']
                #print "Added website "+node['website']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _repairPin(self, row, node):
        if 'location' in node:
            if not row['Pincode'] and 'zip' in node['location']:
                row['Pincode'] = node['location']['zip']
                #print "Added pin "+node['location']['zip']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _repairStreet(self, row, node):
        if 'location' in node:
            if not row['Street Address'] and 'street' in node['location']:
                row['Street Address'] = node['location']['street']
                #print "Added address "+node['location']['street']+" to "+row["Name"]+" from facebook"
                return 1
        return 0

    def _addPage(self, row, node):
        if 'link' in node:
            row['fb_page'] = node['link']
            #print "Added page "+node['link']+" to "+row["Name"]+" from facebook"
            self.viewFactor += 1
            return 1
        return 0

    def _isVerified(self, row, node):
        if 'is_verified' in node:
            if node['is_verified']:
                row['fb_verified'] = 'True'
                self.viewFactor += 2
                return 1
            row['fb_verified'] = 'False'
        return 0

    def _addCover(self, row, node):
        if 'cover' in node:
            row['Images URL'] = node['cover']['source'] + "," + row[
                'Images URL']
            #print "Added cover "+node['cover']['source']+" to "+row["Name"]+" from facebook"
            self.viewFactor += 1
            return 1
        return 0

    def _addEmails(self, row, node):
        check = 0
        if 'emails' in node:
            for i in node['emails']:
                if row['Mail'] and i.encode(
                        'utf-8', 'ignore').strip() not in row['Mail'].strip():
                    row['Mail2'] = i
                    return check + 1
                row['Mail'] = i
                check = 1
        return check

    def _addPhone(self, row, node):
        if 'phone' in node:
            ph = map(UTF8, node['phone'].split(','))
            for i in range(1, 6):
                if not row['Phone' + str(i)]:
                    break
            for j, p in zip(range(i + 1, 6), ph):
                row['Phone' + str(j)] = p.strip()
                #print "Added phone "+p.strip()+" in "+'Phone'+str(j)+" from facebook"+str(node['location'])
            return 1
        return 0

    def _addPicture(self, row, node):
        if not 'id' in node:
            return 0
        profile_pic = self.graceful_request(
            node['id'] + "/picture?height=500&width=500&redirect")
        if 'data' in profile_pic:
            if 'url' in profile_pic['data'] and 'is_silhouette' in profile_pic[
                    'data']:
                if not profile_pic['data']['is_silhouette']:
                    if row['Images URL'] != None:
                        row['Images URL'] = profile_pic['data'][
                            'url'] + "," + row['Images URL']
                        self.viewFactor += 2
                        return 1
        return 0

    def processSelective(self, rows, selection):
        """
    Available Selections are:
                _repairDetails
                _repairWebsite
                _repairPin
                _repairStreet
                _addPage
                _addCover
                _addPicture
                _addPhone
                _addEmail
        e.g.
        >>> Graph = processGraph()
        >>> Graph.processSelective(CSV_Dictionary,'_repairDetails')
        """
        stat = 0
        if selection in dir(self):
            method = getattr(self, selection)
        for row in rows:
            try:
                node = self.searchPlace(row)
                stat += method(row, node)
            except:
                logging.exception("Error loading %s from facebook for %s" %
                                  (selection, row['Name']))
        print("New Info Added from Facebook\n%s:%d" % (stat))

    def _addViews(self, row):
        if row['Total Views']:
            row['Total Views'] += self.viewFactor * randint(100, 200)
        else:
            row['Total Views'] = self.viewFactor * randint(100, 200)

    def _nodePhotos(self, row, node):
        if 'id' not in node:
            return
        photos = []
        after = ''
        while True:
            resp = self.graceful_request(
                node['id'] +
                '/photos?type=uploaded&fields=source&limit=10&after=%s' %
                after)
            if 'data' in resp:
                for i in resp['data']:
                    photos.append(i['source'])
            if 'paging' in resp:
                after = resp['paging']['cursors']['after']
                if 'next' not in resp['paging']:
                    break
            else:
                break
            # TO GUARANTEE QUICK TERMINATION
            if len(photos) >= 10:
                break

        row_data = ''
        for photo in photos:
            if row_data:
                row_data += ',' + photo
            else:
                row_data = photo
        row['fb_photos'] = row_data

    def _nodeVideos(self, row, node):
        if 'id' not in node:
            return
        videos = []
        after = ''
        while True:
            resp = self.graceful_request(
                node['id'] +
                '/videos?type=uploaded&fields=source,title,description&limit=10&after=%s'
                % after)
            if 'data' in resp:
                for i in resp['data']:
                    videos.append(i)
            if 'paging' in resp:
                after = resp['paging']['cursors']['after']
                if 'next' not in resp['paging']:
                    break
            else:
                break
            # TO GUARANTEE QUICK TERMINATION
            if len(videos) >= 10:
                break

        row_data = ''
        for video in videos:
            x = ''
            if 'title' in video:
                x = '{"title":"%s","link":"%s"}' % (video['title'].encode(
                    'ascii', 'ignore').replace('"', ''), video['source'])
            elif 'description' in video:
                x = '{"title":"%s","link":"%s"}' % (
                    video['description'].encode('ascii', 'ignore').replace(
                        '"', ''), video['source'])
            else:
                x = '{"title":"%s","link":"%s"}' % ('', video['source'])

            if row_data:
                row_data += ',' + x
            else:
                row_data = x
        row['fb_videos'] = row_data

    def _nodeWorkingHours(self, row, node):
        if 'id' not in node:
            return
        resp = self.graceful_request(node['id'] + '?fields=hours')
        row_data = ''
        if 'hours' in resp:
            try:
                row_data = parseFBWorkHours.parse(resp['hours'])
            except:
                logging.exception("Error parsing Working Hours for" +
                                  row['Name'])

        row['fb_workingHours'] = row_data

    def _nodePosts(self, row, node):
        if 'id' not in node:
            return
        posts = []
        after = ''
        while True:
            resp = self.graceful_request(
                node['id'] +
                '/posts?fields=message,type,created_time&limit=90&next=%s' %
                after)
            if 'data' in resp:
                for i in resp['data']:
                    if i['type'] == 'status' and 'message' in i:
                        posts.append(i)
            if 'paging' in resp:
                after = resp['paging']['next']
            else:
                break
            # TO GUARANTEE QUICK TERMINATION
            if len(posts) >= 10:
                break

        row_data = ''
        for post in posts:
            x = '{"created_time":"%s","message":"%s"}' % (
                post['created_time'], post['message'].encode(
                    'ascii', 'ignore').replace('"', ''))
            if row_data:
                row_data += ',' + x
            else:
                row_data = x
        row['fb_posts'] = row_data

    def _mergeData(self, row):
        # Merge Photos
        if row['fb_photos']:
            if row['Images URL']:
                row['Images URL'] = row['fb_photos'] + ',' + row['Images URL']
            else:
                row['Images URL'] = row['fb_photos']

        if not row['Working Hours'] and row['fb_workingHours']:
            row['Working Hours'] = row['fb_workingHours']

    def processAll(self, rows, state):
        details, link, cover, website, pincode, street, dp, verified, phone, email = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  #stats
        total = len(rows)
        print("\nFetching info from FB Graph")
        print 'STATE : ', state
        for progress, row in enumerate(rows):
            try:
                node = self.searchPlace(row, state)
                details += self._repairDetails(row, node)
                website += self._repairWebsite(row, node)
                pincode += self._repairPin(row, node)
                street += self._repairStreet(row, node)
                link += self._addPage(row, node)
                phone += self._addPhone(row, node)
                email += self._addEmails(row, node)
                verified += self._isVerified(row, node)
                self._addViews(row)

                #self._nodePosts(row,node)
                self._nodeVideos(row, node)
                self._nodePhotos(row, node)
                self._nodeWorkingHours(row, node)

                self._mergeData(row)
                # ENSURES COVER/DP AS THE FIRST PICTURE
                cover += self._addCover(row, node)
                dp += self._addPicture(row, node)

##                pro=int((float(progress)/total)*100) # Comment out to avoid Bad characters in logs
##                sys.stdout.write("\r%d%%"%pro)
##                sys.stdout.flush()
            except:
                logging.exception(
                    "Error loading information from facebook for " +
                    row['Name'])
##        sys.stdout.write("\r100%")
##        sys.stdout.flush()
        print(
            "\nNew Info Added from Facebook\nDetails:%d Facebook Link:%d Cover:%d \nWebsite:%d Pincode:%d Address:%d Images:%d Verified %d/%d Phone:%d Emails:%d"
            % (details, link, cover, website, pincode, street, dp, verified,
               link, phone, email))
def scrapeFacebookPageFeedStatus(page_id1, access_token, since_date,
                                 until_date):
    total = 0
    count = 0
    graph = GraphAPI(
        'EAACEdEose0cBAPtLJurRzWQuf3L0jzczs4OAXnaseF8MKMnAAJKW3zAH1HaepKj7FeC28WdN7v6CNyzLDfj2GhhlxGnpZB3TZA3O2TE89RsPTDTCcVYxZBFrx1ZBE3NnqQuFFwXPHMnZAEG6khLIZCRk50STZB52ULOZBoQLVhwYyyJ3sZCrejdcFGDY3Y0DP5iDfOPqqlZCuXsAZDZD'
    )
    events = graph.search(page_id1, 'page', page=True)

    for col in events:
        for rec in col['data']:
            page_id = rec['id']

    #print(page_id)

    with open('{}_facebook_statuses.csv'.format(page_id1), 'w') as file:

        has_next_page = True
        num_processed = 0
        scrape_starttime = datetime.datetime.now()
        after = ''
        base = "https://graph.facebook.com/v2.9"
        node = "/{}/posts".format(page_id)
        node2 = "/{}".format(page_id)
        parameters = "/?limit={}&access_token={}".format(100, access_token)
        since = "&since={}".format(since_date) if since_date \
            is not '' else ''
        until = "&until={}".format(until_date) if until_date \
            is not '' else ''

        #print("Scraping {} Facebook Page: {}\n".format(page_id, scrape_starttime))

        base_url1 = base + node2 + parameters + "&fields=name,fan_count"
        total_like = json.loads(request_until_succeed(base_url1))

        #print(int(total_like["fan_count"]))
        total = int(total_like["fan_count"])

        while has_next_page:
            after = '' if after is '' else "&after={}".format(after)
            base_url = base + node + parameters + after + since + until

            url = getFacebookPageFeedUrl(base_url)
            statuses = json.loads(request_until_succeed(url))
            reactions = getReactionsForStatuses(base_url)

            for status in statuses['data']:

                # Ensure it is a status with the expected metadata
                if 'reactions' in status:
                    status_data = processFacebookPageFeedStatus(status)
                    reactions_data = reactions[status_data[0]]
                    #print(str(status_data[6]))

                    count = count + int(status_data[6])
                    # calculate thankful/pride through algebra
                    num_special = status_data[6] - sum(reactions_data)
                    #w.writerow(status_data + reactions_data + (num_special,))

                num_processed += 1

            # if there is no next page, we're done.
            if 'paging' in statuses:
                after = statuses['paging']['cursors']['after']
            else:
                has_next_page = False

        print("\nDone!\n{} Statuses Processed in {}".format(
            num_processed,
            datetime.datetime.now() - scrape_starttime))

        return count, total
Beispiel #15
0
def test_search():
    graph = GraphAPI()

    results = graph.search(term='the meaning of life', type='post')

    assert results.__class__ is list
Beispiel #16
0
from facepy import GraphAPI
import requests

#
#r = requests.get('https://graph.facebook.com/oauth/access_token?#client_id=1795242464078863&client_secret=9f6b035a6a70c37fd01c8027366f1f6a&grant_type=client_credentials')
#access_token = r.text.split('=')[1]
#print access_token

graph = GraphAPI(
    'EAACEdEose0cBAGKccEIslLDXlzgm62iIz4u4Tl6v6UZAxzrC1iuoJfTbXKrcrpfzJZCzfHfOEEZCWxmFsMxtlCOlnSHfUTfYGZAirQZAEJ2en55s1dQ6GdADoeuMXh6aVPOyBJ1fIwq2VoXcUd8VKfR6fjo6mzkM770KQzWXy8QZDZD'
)
# access key generada en GraphAPI: https://developers.facebook.com/tools/explorer/

#EJEMPLOS
#print graph.get(path = 'me')
#print graph.get(path = 'me/friends')
#print graph.get(path = 'me?fields=gender,languages,timezone')
#print graph.get(path = 'elpais?fields=about ')

#print graph.get(path = 'veronica.tortorella') ##devuelve ERROR porque no se puede buscar por username.

#PRUEBO CON LIBRERIA REQUESTS, pasa lo mismo
#print requests.get('https://graph.facebook.com/v2.2/veronica.tortorella?&access_token=EAACEdEose0cBAGKccEIslLDXlzgm62iIz4u4Tl6v6UZAxzrC1iuoJfTbXKrcrpfzJZCzfHfOEEZCWxmFsMxtlCOlnSHfUTfYGZAirQZAEJ2en55s1dQ6GdADoeuMXh6aVPOyBJ1fIwq2VoXcUd8VKfR6fjo6mzkM770KQzWXy8QZDZD').json()

print graph.search(term='veronica tortorella', type='user')

#PRUEBO CON LIBRERIA REQUESTS
print requests.get(
    'https://graph.facebook.com/v2.2/search?q=coffee&type=place&center=37.76,-122.427&distance=1000&access_token=EAACEdEose0cBAGZBGvUPU4jjSMbTXtKhkhIeyrVGyyzQcXEVoBjoZCnnbzuK0Ug8G52ttXvMOMw8tgODPdayYrC2S0TcE7KtFHOe69cRjvWaAIuwDXpzDD8HZBp79WZAQLi3PKZAwrIYzrv85PrH5kGgaIcWQ7uao6vft4gPf7AZDZD'
).json()
Beispiel #17
0
                    print("{} Statuses Processed: {}".format(
                        num_processed, datetime.datetime.now()))

            # if there is no next page, we're done.
            if 'paging' in statuses:
                after = statuses['paging']['cursors']['after']
            else:
                has_next_page = False

        print("\nDone!\n{} Statuses Processed in {}".format(
            num_processed,
            datetime.datetime.now() - scrape_starttime))


if __name__ == '__main__':

    graph = GraphAPI(
        'EAAWZBb15sYkIBAOMaLu7xSSdYRSU7nSKsozM05O9ZAIJZB58ZApDfxhbLmKgMsk2Un3X3yEJPtG7zP39jWZBP9rUaGQ3m8meHdQHEqzdGylpDkZB7HGCOOhB9Xq8jIepYJg3GmdEQpE88Ua2SACvdnPcz7PPoewnMZCtziQNSrp9ZBD4IppTnJKlaFkASlA6jQ4ZD'
    )
    events = graph.search('Coleman Powersports- Falls Church',
                          'page',
                          page=True)

    for col in events:
        for rec in col['data']:
            id1 = rec['id']

    print(id1)

    scrapeFacebookPageFeedStatus(id1, access_token, since_date, until_date)
Beispiel #18
0
from facepy import GraphAPI
import json
import time

startup_fan_pages = {}

access_token = "CAACEdEose0cBAMRSYnOgvGS19ZAmraBuyVQ6lkrqnSJtyKD1oozQFeCYXvIc6IejhPEYRSQ3tptX3ZAhhiHgZCZA8OuBir51mzocHXBO1kpBgcTYHg1moSMwMc1kjy8abfZA3ZCGnpYtpoOyx0mmaruhzVCRi3w6t5KwXN2XIWxCA9h729ZBXmlza2KsNjCaDDeMsssqdaBHSYyn4AwUUqY"  # get it here https://developers.facebook.com/tools/explorer/

graph = GraphAPI(access_token)

browser = webdriver.Firefox()
browser.get("http://startupsinnepal.com")

time.sleep(40)  #wait for the browser to completely load the page

startups = browser.find_elements_by_class_name(
    "panel-title")  #returns a list of objects having class="panel-title"
print("startups found")

for startup in startups:
    #print(startup.text)
    r = graph.search(startup.text.lower(), "page", page=False,
                     retry=3)  #page=False is to refuse to get a generator,
    if len(r['data']) > 0:
        startup_fan_pages[r['data'][0]['name']] = str(r['data'][0]['id'])

#print(startup_fan_pages)
print(startup_fan_pages)
with open('startupsinnepalfanpages.json', 'w') as fp:
    json.dump(startup_fan_pages, fp)
Beispiel #19
0
graph = GraphAPI('EAAQb0kyF82kBAGQrFHsAuGbvUJ2MlQ6rIYhpVFWIV9RZBRuFutgcM5BCq92qEm5mtvLnCNgS2gTn9RP69S4X2MdwQHkMQmQiFrNAgCGS3XIdexFhPiTe07bvoFoWknbXivgzZCnkNshIFYbDiV85SsniYKe7kJTlzO4CYPRgZDZD')

graph.get("me?fields=id,name,age_range,birthday,education,email")# get your profile information



graph.get('me/friends')#number of friends connection you have




graph.get('me/posts')#search for top posted created by you



graph.search(term='diwali', type='event', page=False)#search for the term diwali from the events




from urllib.request import urlopen



data= "C:/Users/Riya/Desktop/PORTFOLIO.csv"
my_portfolio=pd.read_csv(data)#reads the csv file



symbol_list=list(my_portfolio["Stock Symbol"])
symbol_list