Esempio n. 1
0
    def event_run(self):
        keywords = ['warszawa', 'warsaw', 'warszawie', 'waw', 'hackwaw',
                    'stodola', 'hydrozagadka', 'palladium', 'proxima',
                    'fabryka', 'trzciny', 'sen', 'pszczoly', 'plan', 'arkadia', 'tarasy',
                    'sala', 'kongresowa', 'dom', 'kultury',
                    'festiwal', 'koncert', 'noc', 'impreza', 'party', 'mecz', 'manifestacja',
                    'prezentacja', 'wystawa', 'wystawy', 'projekcja', 'dyskusja', 'debata', 'spotkanie',
                    'w', 'na', 'o', 'we', 'i', 'a', '@']
        event_iter = self.crawl_events((52.228641, 21.015558), 15000, custom_keywords=keywords)

        crawl_time = datetime.now()

        sys.stdout.write('crawling events\n')
        sys.stdout.flush()

        for evd in event_iter:
            fbev = evd['model']
            if not fbev.event_added:
                ev = Event()
                ev.title = fbev.name
                ev.typename = 'facebook_event'
                ev.location = fbev.location
                ev.start_time = fbev.start_time
                ev.end_time = fbev.end_time
                ev.description = fbev.description
                ev.image_url = None
                ev.url = 'http://www.facebook.com/event/%s/' % fbev.fbid
                ev.save()
                fbev.event_added = True
                fbev.event = ev
                fbev.save()
                msg = 'added %s, id=%s' % (ev.title, fbev.fbid)
                sys.stdout.write('\n%s\n' % msg)
                sys.stdout.flush()
            else:
                sys.stdout.write('.')
                sys.stdout.flush()
        sys.stdout.write('\n')
        sys.stdout.flush()

        sys.stdout.write('crawling event attendance\n')
        sys.stdout.flush()

        q = FBEvent.objects(attendance_pulses_added=False)
        for fbev in q:
            event_url = 'http://www.facebook.com/event/%s/' % fbev.fbid
            ev = fbev.event
            if ev is None:
                ev_q = Event.objects(url=event_url)
                if len(ev_q) == 1:
                    ev = ev_q[0]
            if ev:
                result = self.graph.get_event_members(fbev.fbid)
                if 'attending' in result:
                    for fbid in result['attending']:
                        p = Pulse()
                        p.title = fbev.name
                        p.typename = 'facebook_attendance'
                        p.event = ev
                        p.data = {'uid': fbid, 'eid': fbev.fbid}
                        p.timestamp_added = datetime.now()
                        p.timestamp_created = fbev.start_time
                        p.location = fbev.location
                        p.url = event_url
                        p.save()
                        sys.stdout.write('.')
                    sys.stdout.flush()
                fbev.attendance_pulses_added = True
                if fbev.event is None:
                    fbev.event = ev
                fbev.save()
        sys.stdout.write('\n')
        sys.stdout.flush()
Esempio n. 2
0
    def crawl_events(self, position, distance, custom_keywords=None):
        crawl_time = datetime.now()
        graph = ExtendedGraphAPI(self.app_access_token)
        data = self.crawl_objects('event', position, distance, by_place=False, custom_keywords=custom_keywords, graph=graph)
        for d in data:
            evd = {
                'id': d['id'],
                'start_time':d['start_time'],
                'end_time':d['end_time'],
                'name': d['name'],
            }
            q = FBEvent.objects(fbid=evd['id'])
            if len(q) == 1:
                #print 'found in db %s' % evd['id']
                ev = q[0]
                evd['location'] = tuple(ev.location) if ev.location is not None else None
                if self.in_boundary(evd['location'], position, distance):
                    evd['model'] = ev
                    yield evd
            else:
                result = self.graph.get(path=evd['id'])
                #print result
                try:
                    x = result['venue']['latitude']
                    y = result['venue']['longitude']
                    event_position = (x, y)
                    if not self.in_boundary(event_position, position, distance):
                        event_position = None
                except Exception:
                    event_position = None

                ev = FBEvent()
                ev.fbid = evd['id']
                ev.name = evd['name']

                ev.start_time = dateutil.parser.parse(evd['start_time'])
                ev.end_time = dateutil.parser.parse(evd['end_time'])
                ev.add_time = datetime.now()
                ev.last_crawl_time = crawl_time
                ev.attendant_num = 0

                if event_position is not None:
                    evd['location'] = event_position
                    ev.location = evd['location']
                    ev.save()
                    evd['model'] = ev
                    yield evd
                else:
                    #also save not matchin elements
                    ev.save()