def event_run(self): keywords = ['warszawa', 'warsaw', 'warszawie', 'waw', 'hackwaw', 'stodola', 'hydrozagadka', 'palladium', 'proxima', 'fabryka', 'trzciny', 'sen', 'pszczoly', 'plan', 'arkadia', 'tarasy', 'sala', 'kongresowa', 'dom', 'kultury', 'festiwal', 'koncert', 'noc', 'impreza', 'party', 'mecz', 'manifestacja', 'prezentacja', 'wystawa', 'wystawy', 'projekcja', 'dyskusja', 'debata', 'spotkanie', 'w', 'na', 'o', 'we', 'i', 'a', '@'] event_iter = self.crawl_events((52.228641, 21.015558), 15000, custom_keywords=keywords) crawl_time = datetime.now() sys.stdout.write('crawling events\n') sys.stdout.flush() for evd in event_iter: fbev = evd['model'] if not fbev.event_added: ev = Event() ev.title = fbev.name ev.typename = 'facebook_event' ev.location = fbev.location ev.start_time = fbev.start_time ev.end_time = fbev.end_time ev.description = fbev.description ev.image_url = None ev.url = 'http://www.facebook.com/event/%s/' % fbev.fbid ev.save() fbev.event_added = True fbev.event = ev fbev.save() msg = 'added %s, id=%s' % (ev.title, fbev.fbid) sys.stdout.write('\n%s\n' % msg) sys.stdout.flush() else: sys.stdout.write('.') sys.stdout.flush() sys.stdout.write('\n') sys.stdout.flush() sys.stdout.write('crawling event attendance\n') sys.stdout.flush() q = FBEvent.objects(attendance_pulses_added=False) for fbev in q: event_url = 'http://www.facebook.com/event/%s/' % fbev.fbid ev = fbev.event if ev is None: ev_q = Event.objects(url=event_url) if len(ev_q) == 1: ev = ev_q[0] if ev: result = self.graph.get_event_members(fbev.fbid) if 'attending' in result: for fbid in result['attending']: p = Pulse() p.title = fbev.name p.typename = 'facebook_attendance' p.event = ev p.data = {'uid': fbid, 'eid': fbev.fbid} p.timestamp_added = datetime.now() p.timestamp_created = fbev.start_time p.location = fbev.location p.url = event_url p.save() sys.stdout.write('.') sys.stdout.flush() fbev.attendance_pulses_added = True if fbev.event is None: fbev.event = ev fbev.save() sys.stdout.write('\n') sys.stdout.flush()
def crawl_events(self, position, distance, custom_keywords=None): crawl_time = datetime.now() graph = ExtendedGraphAPI(self.app_access_token) data = self.crawl_objects('event', position, distance, by_place=False, custom_keywords=custom_keywords, graph=graph) for d in data: evd = { 'id': d['id'], 'start_time':d['start_time'], 'end_time':d['end_time'], 'name': d['name'], } q = FBEvent.objects(fbid=evd['id']) if len(q) == 1: #print 'found in db %s' % evd['id'] ev = q[0] evd['location'] = tuple(ev.location) if ev.location is not None else None if self.in_boundary(evd['location'], position, distance): evd['model'] = ev yield evd else: result = self.graph.get(path=evd['id']) #print result try: x = result['venue']['latitude'] y = result['venue']['longitude'] event_position = (x, y) if not self.in_boundary(event_position, position, distance): event_position = None except Exception: event_position = None ev = FBEvent() ev.fbid = evd['id'] ev.name = evd['name'] ev.start_time = dateutil.parser.parse(evd['start_time']) ev.end_time = dateutil.parser.parse(evd['end_time']) ev.add_time = datetime.now() ev.last_crawl_time = crawl_time ev.attendant_num = 0 if event_position is not None: evd['location'] = event_position ev.location = evd['location'] ev.save() evd['model'] = ev yield evd else: #also save not matchin elements ev.save()