def runTest(self): fb_event = dict(info=dict(name="FB Event", description="more stuff here, dance class")) classified_event = event_classifier.get_classified_event(fb_event) self.assertEqual(set(['dance']), classified_event.dance_matches()) self.assertEqual(set(['dance class']), classified_event.event_matches())
def get(self): from dancedeets.nlp.soulline.tests import classifier_test tb = classifier_test.TestSoulLine() tb.fbl = fb_api.FBLookup("dummyid", unittest.get_local_access_token_for_testing()) event_runs = [] good_ids = set(classifier_test.GOOD_IDS) all_ids = classifier_test.GOOD_IDS + classifier_test.BAD_IDS for event_id in all_ids: fb_event = tb.get_event(event_id) classified_event = event_classifier.get_classified_event(fb_event) data = classifier_test.FUNC(classified_event) event_runs.append({ 'id': event_id, 'event': fb_event, 'desired_result': event_id in good_ids, 'result': bool(data[0]), 'result_string': data[0], 'reasons': data[1], }) self.display['false_negatives'] = len( [x for x in event_runs if not x['result'] and x['desired_result']]) self.display['false_positives'] = len( [x for x in event_runs if x['result'] and not x['desired_result']]) self.display['vertical'] = 'soul line' self.display['event_runs'] = event_runs self.render_template('test_nlp_results')
def runTest(self): fb_event = dict(info=dict( name="FB Event", description="sessions jam battles cyphers dj's")) classified_event = event_classifier.get_classified_event(fb_event) self.assertEqual(set([]), classified_event.dance_matches()) self.assertEqual(set(['jam', 'battles', 'cyphers']), classified_event.event_matches())
def crawl_event_source(fbl, event_id): logging.info('Crawling sources for event %s', event_id) fb_event = fbl.get(fb_api.LookupEvent, event_id) if fb_event['empty']: logging.error('No FB Event found: %s', event_id) return e = eventdata.DBEvent.get_by_id(fb_event['info']['id']) thing_db.create_sources_from_event(fbl, e) potential_event = potential_events.make_potential_event_without_source( e.fb_event_id) classified_event = event_classifier.get_classified_event( fb_event, potential_event.language) if potential_event: for source_id in potential_event.source_ids_only(): s = thing_db.Source.get_by_key_name(source_id) if not s: logging.warning( "Couldn't find source %s when updating event %s", source_id, e.fb_event_id) continue # TODO(lambert): doesn't handle the case of the match score increasing from <0 to >0 in the future if not classified_event.is_dance_event(): s.num_false_negatives = (s.num_false_negatives or 0) + 1 s.num_real_events = (s.num_real_events or 0) + 1 s.put()
def get(self): from dancedeets.nlp.soulline.tests import classifier_test tb = classifier_test.TestSoulLine() tb.fbl = fb_api.FBLookup("dummyid", unittest.get_local_access_token_for_testing()) event_runs = [] good_ids = set(classifier_test.GOOD_IDS) all_ids = classifier_test.GOOD_IDS + classifier_test.BAD_IDS for event_id in all_ids: fb_event = tb.get_event(event_id) classified_event = event_classifier.get_classified_event(fb_event) data = classifier_test.FUNC(classified_event) event_runs.append({ 'id': event_id, 'event': fb_event, 'desired_result': event_id in good_ids, 'result': bool(data[0]), 'result_string': data[0], 'reasons': data[1], }) self.display['false_negatives'] = len([x for x in event_runs if not x['result'] and x['desired_result']]) self.display['false_positives'] = len([x for x in event_runs if x['result'] and not x['desired_result']]) self.display['vertical'] = 'soul line' self.display['event_runs'] = event_runs self.render_template('test_nlp_results')
def _run_classify(self): basic_classified_event = event_classifier.get_classified_event( self.classified_event.fb_event, language=self.classified_event.language, classifier_type=event_classifier.ClassifiedEvent ) street_name = street.Style.get_name() results = [] for style_name, classifier in styles.CLASSIFIERS.iteritems(): if style_name == street_name: classified_event = self.classified_event else: classified_event = basic_classified_event # debug=None means only debug on local appengine this_classifier = classifier(classified_event, debug=None) is_dance_event = this_classifier.is_dance_event() if is_dance_event: results.append((is_dance_event, this_classifier.debug_info(), this_classifier.vertical)) self._reasons = [x[1] for x in results] self._verticals = [x[2] for x in results] if self._verticals: return (True, 'found some:\n%s' % self._reasons, self._verticals) return (False, 'nothing', [])
def partition_ids(): for i, (id, fb_event) in enumerate(processing.all_fb_data([], filename='local_data/PotentialFBEvents.csv')): e = event_classifier.get_classified_event(fb_event) result = event_auto_classifier.is_battle(e) if result[0]: success(e, fb_event, result) else: failure(e, fb_event, result)
def get_classified_event(fb_event, style_name): street_name = street.Style.get_name() if style_name == street_name: classifier_type = event_classifier.ClassifiedEvent else: classifier_type = event_classifier.BasicClassifiedEvent classified_event = event_classifier.get_classified_event(fb_event, classifier_type=classifier_type) return classified_event
def yield_maybe_delete_bad_event(fbl, db_event): ctx = context.get() if ctx: params = ctx.mapreduce_spec.mapper.params allow_deletes = params['allow_deletes'] else: allow_deletes = False if db_event.creating_method not in [ eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO ]: return if db_event.fb_event['empty']: return import datetime # This is when we started adding all sorts of "crap" if not db_event.creation_time or db_event.creation_time < datetime.datetime( 2016, 3, 5): return logging.info('MDBE: Check on event %s: %s', db_event.id, db_event.creating_method) from dancedeets.event_scraper import auto_add from dancedeets.nlp import event_classifier classified_event = event_classifier.get_classified_event(db_event.fb_event) good_text_event = auto_add.is_good_event_by_text(db_event.fb_event, classified_event) if good_text_event: if db_event.creating_method != eventdata.CM_AUTO: db_event.creating_method = eventdata.CM_AUTO yield op.db.Put(db_event) else: good_event = event_attendee_classifier.is_good_event_by_attendees( fbl, db_event.fb_event, classified_event=classified_event) if good_event: if db_event.creating_method != eventdata.CM_AUTO_ATTENDEE: db_event.creating_method = eventdata.CM_AUTO_ATTENDEE yield op.db.Put(db_event) else: logging.info('Accidentally %s added event %s: %s: %s', db_event.creating_method, db_event.fb_event_id, db_event.country, db_event.name) mr.increment('deleting-bad-event') result = '%s: %s: %s: %s\n' % (db_event.fb_event_id, db_event.creating_method, db_event.country, db_event.name) yield result.encode('utf-8') if allow_deletes: from dancedeets.search import search search.delete_from_fulltext_search_index(db_event.fb_event_id) yield op.db.Delete(db_event) display_event = search.DisplayEvent.get_by_id( db_event.fb_event_id) if display_event: yield op.db.Delete(display_event)
def yield_maybe_delete_bad_event(fbl, db_event): ctx = context.get() if ctx: params = ctx.mapreduce_spec.mapper.params allow_deletes = params['allow_deletes'] else: allow_deletes = False if db_event.creating_method not in [eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO]: return if db_event.fb_event['empty']: return import datetime # This is when we started adding all sorts of "crap" if not db_event.creation_time or db_event.creation_time < datetime.datetime(2016, 3, 5): return logging.info('MDBE: Check on event %s: %s', db_event.id, db_event.creating_method) from dancedeets.event_scraper import auto_add from dancedeets.nlp import event_classifier classified_event = event_classifier.get_classified_event(db_event.fb_event) good_text_event = auto_add.is_good_event_by_text(db_event.fb_event, classified_event) if good_text_event: if db_event.creating_method != eventdata.CM_AUTO: db_event.creating_method = eventdata.CM_AUTO yield op.db.Put(db_event) else: good_event = event_attendee_classifier.is_good_event_by_attendees(fbl, db_event.fb_event, classified_event=classified_event) if good_event: if db_event.creating_method != eventdata.CM_AUTO_ATTENDEE: db_event.creating_method = eventdata.CM_AUTO_ATTENDEE yield op.db.Put(db_event) else: logging.info( 'Accidentally %s added event %s: %s: %s', db_event.creating_method, db_event.fb_event_id, db_event.country, db_event.name ) mr.increment('deleting-bad-event') result = '%s: %s: %s: %s\n' % (db_event.fb_event_id, db_event.creating_method, db_event.country, db_event.name) yield result.encode('utf-8') if allow_deletes: from dancedeets.search import search search.delete_from_fulltext_search_index(db_event.fb_event_id) yield op.db.Delete(db_event) display_event = search.DisplayEvent.get_by_id(db_event.fb_event_id) if display_event: yield op.db.Delete(display_event)
def basic_match(fb_event): e = event_classifier.get_classified_event(fb_event) if not full_run: print e.processed_text.get_tokenized_text() if positive_classifier: c = event_auto_classifier.is_auto_add_event(e) result = c.result else: result = event_auto_classifier.is_auto_notadd_event(e) # classified as good, but not supposed to be in the good set of ids: if result[0] and fb_event['info']['id'] not in training_data.good_ids: # false positive print fb_event['info']['id'], result if not full_run: print fb_event['info']['id'], result return result[0]
def get_matcher(fbl, fb_event, fb_event_attending_maybe=None, classified_event=None): if classified_event is None: classified_event = event_classifier.get_classified_event(fb_event) event_id = fb_event['info']['id'] if fb_event_attending_maybe is None: try: fb_event_attending_maybe = fbl.get(fb_api.LookupEventAttendingMaybe, event_id) except fb_api.NoFetchedDataException: logging.info('Event %s could not fetch event attendees, aborting.', event_id) return None matcher = EventAttendeeMatcher(fb_event, fb_event_attending_maybe, classified_event) matcher.classify() return matcher
def basic_match(fb_event): e = event_classifier.get_classified_event(fb_event) if not full_run: print e.processed_text.get_tokenized_text() if positive_classifier: c = event_auto_classifier.is_auto_add_event(e) result = c.result #result = event_auto_classifier.has_good_djs_title(e) #result = event_auto_classifier.is_workshop(e) else: result = event_auto_classifier.is_auto_notadd_event(e) #result = event_auto_classifier.is_bad_classical_dance(e) # classified as good, but not supposed to be in the good set of ids: if result[0] and fb_event['info']['id'] not in good_ids: # false positive print fb_event['info']['id'], result if not full_run: print fb_event['info']['id'], result return result[0]
def get_matcher(fbl, fb_event, fb_event_attending_maybe=None, classified_event=None): if classified_event is None: classified_event = event_classifier.get_classified_event(fb_event) event_id = fb_event['info']['id'] if fb_event_attending_maybe is None: try: fb_event_attending_maybe = fbl.get( fb_api.LookupEventAttendingMaybe, event_id) except fb_api.NoFetchedDataException: logging.info('Event %s could not fetch event attendees, aborting.', event_id) return None matcher = EventAttendeeMatcher(fb_event, fb_event_attending_maybe, classified_event) matcher.classify() return matcher
def _run_event(self, event_id): fb_event = self.get_event(event_id) classified_event = event_classifier.get_classified_event(fb_event) data = self.classifier_func(classified_event) return data
def get(self): past_event = self.request.get('past_event', None) if past_event == '1': past_event = True elif past_event == '0': past_event = False if past_event is not None: past_event_query = 'AND past_event = %s' % past_event else: past_event_query = '' number_of_events = int(self.request.get('number_of_events', '20')) unseen_potential_events = list( potential_events.PotentialEvent.gql( "WHERE looked_at = NULL AND match_score > 0 %s ORDER BY match_score DESC LIMIT %s" % (past_event_query, number_of_events))) if len(unseen_potential_events) < number_of_events: unseen_potential_events += list( potential_events.PotentialEvent.gql( "WHERE looked_at = NULL AND match_score = 0 AND show_even_if_no_score = True %s ORDER BY match_score DESC LIMIT %s" % (past_event_query, number_of_events - len(unseen_potential_events)))) potential_event_dict = dict( (x.key().name(), x) for x in unseen_potential_events) already_added_event_ids = [ x.string_id() for x in eventdata.DBEvent.get_by_ids(list(potential_event_dict), keys_only=True) if x ] # construct a list of not-added ids for display, but keep the list of all ids around so we can still mark them as processed down below potential_event_notadded_ids = list( set(potential_event_dict).difference(already_added_event_ids)) potential_event_notadded_ids.sort( key=lambda x: -(potential_event_dict[x].match_score or 0)) # Limit to 20 at a time so we don't overwhelm the user. non_zero_events = potential_events.PotentialEvent.gql( "WHERE looked_at = NULL AND match_score > 0 %s" % past_event_query).count(20000) zero_events = potential_events.PotentialEvent.gql( "WHERE looked_at = NULL AND match_score = 0 AND show_even_if_no_score = True %s" % past_event_query).count(20000) total_potential_events = non_zero_events + zero_events has_more_events = total_potential_events > number_of_events potential_event_notadded_ids = potential_event_notadded_ids[: number_of_events] self.fbl.request_multi(fb_api.LookupEvent, potential_event_notadded_ids) # self.fbl.request_multi(fb_api.LookupEventAttending, potential_event_notadded_ids) self.finish_preload() template_events = [] for e in potential_event_notadded_ids: try: fb_event = self.fbl.fetched_data(fb_api.LookupEvent, e) fb_event_attending = None # self.fbl.fetched_data(fb_api.LookupEventAttending, e) except KeyError: logging.error("Failed to load event id %s", e) continue if fb_event['empty']: continue classified_event = event_classifier.get_classified_event( fb_event, potential_event_dict[e]) if classified_event.is_dance_event(): reason = classified_event.reason() dance_words_str = ', '.join( list(classified_event.dance_matches())) event_words_str = ', '.join( list(classified_event.event_matches())) wrong_words_str = ', '.join( list(classified_event.wrong_matches())) else: reason = None dance_words_str = 'NONE' event_words_str = 'NONE' wrong_words_str = 'NONE' location_info = None # event_locations.LocationInfo(fb_event, debug=True) potential_event_dict[ e] = potential_events.update_scores_for_potential_event( potential_event_dict[e], fb_event, fb_event_attending) template_events.append( dict(fb_event=fb_event, classified_event=classified_event, dance_words=dance_words_str, event_words=event_words_str, wrong_words=wrong_words_str, keyword_reason=reason, potential_event=potential_event_dict[e], location_info=location_info)) template_events = sorted( template_events, key=lambda x: -len(x['potential_event'].sources())) self.display['number_of_events'] = number_of_events self.display['total_potential_events'] = '%s + %s' % (non_zero_events, zero_events) self.display['has_more_events'] = has_more_events self.display['potential_events_listing'] = template_events self.display['potential_ids'] = ','.join( already_added_event_ids + potential_event_notadded_ids ) # use all ids, since we want to mark already-added ids as processed as well. but only the top N of the potential event ids that we're showing to the user. self.display['track_analytics'] = False self.render_template('admin_potential_events')
def really_classify_events(fbl, new_pe_list, new_fb_list, allow_posting=True): if not new_pe_list: new_pe_list = [None] * len(new_fb_list) logging.info('Filtering out already-added events and others, have %s remaining events to run the classifier on', len(new_fb_list)) fb_event_ids = [x['info']['id'] for x in new_fb_list] fb_attending_maybe_list = fbl.get_multi(fb_api.LookupEventAttendingMaybe, fb_event_ids, allow_fail=True) results = [] for pe, fb_event, fb_event_attending_maybe in zip(new_pe_list, new_fb_list, fb_attending_maybe_list): event_id = fb_event['info']['id'] logging.info('Is Good Event By Text: %s: Checking...', event_id) # And then classify it appropriately classified_event = event_classifier.get_classified_event(fb_event) auto_add_result = event_auto_classifier.is_auto_add_event(classified_event) logging.info('Is Good Event By Text: %s: %s', event_id, auto_add_result) good_event = False if auto_add_result.is_good_event(): good_event = True method = eventdata.CM_AUTO verticals = auto_add_result.verticals() elif fb_event_attending_maybe: logging.info('Is Good Event By Attendees: %s: Checking...', event_id) good_event = event_attendee_classifier.is_good_event_by_attendees( fbl, fb_event, fb_event_attending_maybe=fb_event_attending_maybe, classified_event=classified_event ) logging.info('Is Good Event By Attendees: %s: %s', event_id, good_event) method = eventdata.CM_AUTO_ATTENDEE verticals = [street.Style.get_name()] if good_event: result = '+%s\n' % '\t'.join((event_id, fb_event['info'].get('name', ''))) try: invite_ids = pe.get_invite_uids() if pe else [] logging.info('VTFI %s: Adding event %s, due to pe-invite-ids: %s', event_id, event_id, invite_ids) e = add_entities.add_update_fb_event( fb_event, fbl, visible_to_fb_uids=invite_ids, creating_method=method, allow_posting=allow_posting, verticals=verticals, ) pe2 = potential_events.PotentialEvent.get_by_key_name(event_id) pe2.looked_at = True pe2.auto_looked_at = True pe2.put() # TODO(lambert): handle un-add-able events differently results.append(result) mr.increment('auto-added-dance-events') if e.start_time < datetime.datetime.now(): mr.increment('auto-added-dance-events-past') # mr.increment('auto-added-dance-events-past-eventid-%s' % event_id) for vertical in e.verticals: mr.increment('auto-added-dance-event-past-vertical-%s' % vertical) else: mr.increment('auto-added-dance-events-future') for vertical in e.verticals: mr.increment('auto-added-dance-event-future-vertical-%s' % vertical) for vertical in e.verticals: mr.increment('auto-added-dance-event-vertical-%s' % vertical) except fb_api.NoFetchedDataException as e: logging.error("Error adding event %s, no fetched data: %s", event_id, e) except add_entities.AddEventException as e: logging.warning("Error adding event %s, no fetched data: %s", event_id, e) return results
def runTest(self): fb_event = dict(info=dict(name="FB Event", description="sessions jam battles cyphers dj's")) classified_event = event_classifier.get_classified_event(fb_event) self.assertEqual(set([]), classified_event.dance_matches()) self.assertEqual(set(['jam', 'battles', 'cyphers']), classified_event.event_matches())
def runTest(self): fb_event = dict(info=dict(name=u'evento di danza', description=u'prima andiamo qui, poi andiamo lì')) classified_event = event_classifier.get_classified_event(fb_event) self.assertNotIn('poi', classified_event.processed_text.text)
def yield_cleanup_verticals(fbl, db_event): ctx = context.get() if ctx: params = ctx.mapreduce_spec.mapper.params allow_deletes = params['allow_deletes'] else: allow_deletes = False if db_event.creating_method not in [ eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO ]: return if db_event.fb_event['empty']: return has_street = 'STREET' in db_event.verticals logging.info('Is Good Event By Text: %s: Checking...', db_event.id) classified_event = event_classifier.get_classified_event(db_event.fb_event) auto_add_result = event_auto_classifier.is_auto_add_event(classified_event) logging.info('Is Good Event By Text: %s: %s', db_event.id, auto_add_result) verticals = [] if auto_add_result.is_good_event(): verticals = auto_add_result.verticals() if has_street and 'STREET' not in verticals: verticals += ['STREET'] for vertical in set(verticals).difference(db_event.verticals): mr.increment('adding-new-vertical-%s' % vertical) for vertical in set(db_event.verticals).difference(verticals): mr.increment('removing-old-vertical-%s' % vertical) old_verticals = db_event.verticals db_event.verticals = verticals mr.increment('event-resave') for vertical in db_event.verticals: mr.increment('event-vertical-total-%s' % vertical) if db_event.start_time < datetime.datetime.now(): mr.increment('event-resave-past') for vertical in db_event.verticals: mr.increment('event-vertical-past-%s' % vertical) else: mr.increment('event-resave-future') for vertical in db_event.verticals: mr.increment('event-vertical-future-%s' % vertical) changed = set(old_verticals) != set(verticals) if verticals: if changed: db_event.put() else: admin_ids = [admin['id'] for admin in db_event.admins] if allow_deletes: db_event.key.delete() mr.increment('deleting-bad-event') sources = thing_db.Source.get_by_key_name(admin_ids) for source in sources: if not source: continue num_events = eventdata.DBEvent.query( eventdata.DBEvent.admin_fb_uids == source.graph_id).count(1000) if num_events == 0: if allow_deletes: source.delete() mr.increment('deleting-bad-source')
def really_classify_events(fbl, new_pe_list, new_fb_list, allow_posting=True): if not new_pe_list: new_pe_list = [None] * len(new_fb_list) logging.info( 'Filtering out already-added events and others, have %s remaining events to run the classifier on', len(new_fb_list)) fb_event_ids = [x['info']['id'] for x in new_fb_list] fb_attending_maybe_list = fbl.get_multi(fb_api.LookupEventAttendingMaybe, fb_event_ids, allow_fail=True) results = [] for pe, fb_event, fb_event_attending_maybe in zip(new_pe_list, new_fb_list, fb_attending_maybe_list): event_id = fb_event['info']['id'] logging.info('Is Good Event By Text: %s: Checking...', event_id) # And then classify it appropriately classified_event = event_classifier.get_classified_event(fb_event) auto_add_result = event_auto_classifier.is_auto_add_event( classified_event) logging.info('Is Good Event By Text: %s: %s', event_id, auto_add_result) good_event = False if auto_add_result.is_good_event(): good_event = True method = eventdata.CM_AUTO verticals = auto_add_result.verticals() elif fb_event_attending_maybe: logging.info('Is Good Event By Attendees: %s: Checking...', event_id) good_event = event_attendee_classifier.is_good_event_by_attendees( fbl, fb_event, fb_event_attending_maybe=fb_event_attending_maybe, classified_event=classified_event) logging.info('Is Good Event By Attendees: %s: %s', event_id, good_event) method = eventdata.CM_AUTO_ATTENDEE verticals = [street.Style.get_name()] if good_event: result = '+%s\n' % '\t'.join( (event_id, fb_event['info'].get('name', ''))) try: invite_ids = pe.get_invite_uids() if pe else [] logging.info( 'VTFI %s: Adding event %s, due to pe-invite-ids: %s', event_id, event_id, invite_ids) e = add_entities.add_update_fb_event( fb_event, fbl, visible_to_fb_uids=invite_ids, creating_method=method, allow_posting=allow_posting, verticals=verticals, ) pe2 = potential_events.PotentialEvent.get_by_key_name(event_id) pe2.looked_at = True pe2.auto_looked_at = True pe2.put() # TODO(lambert): handle un-add-able events differently results.append(result) mr.increment('auto-added-dance-events') if e.start_time < datetime.datetime.now(): mr.increment('auto-added-dance-events-past') # mr.increment('auto-added-dance-events-past-eventid-%s' % event_id) for vertical in e.verticals: mr.increment( 'auto-added-dance-event-past-vertical-%s' % vertical) else: mr.increment('auto-added-dance-events-future') for vertical in e.verticals: mr.increment( 'auto-added-dance-event-future-vertical-%s' % vertical) for vertical in e.verticals: mr.increment('auto-added-dance-event-vertical-%s' % vertical) except fb_api.NoFetchedDataException as e: logging.error("Error adding event %s, no fetched data: %s", event_id, e) except add_entities.AddEventException as e: logging.warning("Error adding event %s, no fetched data: %s", event_id, e) return results
def yield_cleanup_verticals(fbl, db_event): ctx = context.get() if ctx: params = ctx.mapreduce_spec.mapper.params allow_deletes = params['allow_deletes'] else: allow_deletes = False if db_event.creating_method not in [eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO]: return if db_event.fb_event['empty']: return has_street = 'STREET' in db_event.verticals logging.info('Is Good Event By Text: %s: Checking...', db_event.id) classified_event = event_classifier.get_classified_event(db_event.fb_event) auto_add_result = event_auto_classifier.is_auto_add_event(classified_event) logging.info('Is Good Event By Text: %s: %s', db_event.id, auto_add_result) verticals = [] if auto_add_result.is_good_event(): verticals = auto_add_result.verticals() if has_street and 'STREET' not in verticals: verticals += ['STREET'] for vertical in set(verticals).difference(db_event.verticals): mr.increment('adding-new-vertical-%s' % vertical) for vertical in set(db_event.verticals).difference(verticals): mr.increment('removing-old-vertical-%s' % vertical) old_verticals = db_event.verticals db_event.verticals = verticals mr.increment('event-resave') for vertical in db_event.verticals: mr.increment('event-vertical-total-%s' % vertical) if db_event.start_time < datetime.datetime.now(): mr.increment('event-resave-past') for vertical in db_event.verticals: mr.increment('event-vertical-past-%s' % vertical) else: mr.increment('event-resave-future') for vertical in db_event.verticals: mr.increment('event-vertical-future-%s' % vertical) changed = set(old_verticals) != set(verticals) if verticals: if changed: db_event.put() else: admin_ids = [admin['id'] for admin in db_event.admins] if allow_deletes: db_event.key.delete() mr.increment('deleting-bad-event') sources = thing_db.Source.get_by_key_name(admin_ids) for source in sources: if not source: continue num_events = eventdata.DBEvent.query(eventdata.DBEvent.admin_fb_uids == source.graph_id).count(1000) if num_events == 0: if allow_deletes: source.delete() mr.increment('deleting-bad-source')
def runTest(self): fb_event = dict( info=dict(name=u'evento di danza', description=u'prima andiamo qui, poi andiamo lì')) classified_event = event_classifier.get_classified_event(fb_event) self.assertNotIn('poi', classified_event.processed_text.text)
def get(self): event_id = None if self.request.get('event_url'): event_id = urls.get_event_id_from_url( self.request.get('event_url')) elif self.request.get('event_id'): event_id = self.request.get('event_id') self.finish_preload() fb_event = get_fb_event(self.fbl, event_id) if not fb_event: logging.error('No fetched data for %s, showing error page', event_id) return self.show_barebones_page(event_id, "No fetched data") e = eventdata.DBEvent.get_by_id(event_id) if not fb_events.is_public_ish(fb_event): if e: fb_event = e.fb_event else: self.add_error( 'Cannot add secret/closed events to dancedeets!') self.errors_are_fatal() owner_location = None if 'owner' in fb_event['info']: owner_id = fb_event['info']['owner']['id'] location = self._get_location(owner_id, fb_api.LookupProfile, 'profile') or self._get_location( owner_id, fb_api.LookupThingPage, 'info') if location: owner_location = event_locations.city_for_fb_location(location) self.display['owner_location'] = owner_location display_event = search.DisplayEvent.get_by_id(event_id) # Don't insert object until we're ready to save it... if e and e.creating_fb_uid: #STR_ID_MIGRATE creating_user = self.fbl.get(fb_api.LookupProfile, str(e.creating_fb_uid)) if creating_user.get('empty'): logging.warning( 'Have creating-user %s...but it is not publicly visible, so treating as None: %s', e.creating_fb_uid, creating_user) creating_user = None else: creating_user = None potential_event = potential_events.make_potential_event_without_source( event_id) a = time.time() classified_event = event_classifier.get_classified_event( fb_event, potential_event.language) timelog.log_time_since('Running BasicText Classifier', a) self.display['classified_event'] = classified_event dance_words_str = ', '.join(list(classified_event.dance_matches())) if classified_event.is_dance_event(): event_words_str = ', '.join(list(classified_event.event_matches())) else: event_words_str = 'NONE' self.display['classifier_dance_words'] = dance_words_str self.display['classifier_event_words'] = event_words_str self.display['creating_user'] = creating_user self.display['potential_event'] = potential_event self.display['display_event'] = display_event start = time.time() add_result = event_auto_classifier.is_auto_add_event(classified_event) notadd_result = event_auto_classifier.is_auto_notadd_event( classified_event, auto_add_result=add_result) timelog.log_time_since('Running Text Classifier', start) auto_classified = '' if add_result.is_good_event(): auto_classified += 'add: %s.\n' % add_result if notadd_result[0]: auto_classified += 'notadd: %s.\n' % notadd_result[1] self.display['auto_classified_add'] = add_result self.display['auto_classified_notadd'] = notadd_result styles = categories.find_styles(fb_event) event_types = styles + categories.find_event_types(fb_event) self.display['auto_categorized_types'] = ', '.join( x.public_name for x in event_types) a = time.time() fb_event_attending_maybe = get_fb_event( self.fbl, event_id, lookup_type=fb_api.LookupEventAttendingMaybe) timelog.log_time_since('Loading FB Event Attending Data', a) a = time.time() location_info = event_locations.LocationInfo( fb_event, fb_event_attending_maybe=fb_event_attending_maybe, db_event=e, debug=True) self.display['location_info'] = location_info if location_info.fb_address: fb_geocode = gmaps_api.lookup_address(location_info.fb_address) self.display['fb_geocoded_address'] = formatting.format_geocode( fb_geocode) else: self.display['fb_geocoded_address'] = '' city_name = 'Unknown' if location_info.geocode: city = cities_db.get_nearby_city( location_info.geocode.latlng(), country=location_info.geocode.country()) if city: city_name = city.display_name() self.display['ranking_city_name'] = city_name person_ids = fb_events.get_event_attendee_ids(fb_event_attending_maybe) if location_info.geocode: data = person_city.get_data_fields(person_ids, location_info.geocode.latlng()) self.display['attendee_distance_info'] = data else: self.display['attendee_distance_info'] = 'Unknown' matcher = event_attendee_classifier.get_matcher( self.fbl, fb_event, fb_event_attending_maybe=fb_event_attending_maybe, classified_event=classified_event) timelog.log_time_since('Running Attendee Classifier', a) # print '\n'.join(matcher.results) sorted_matches = sorted(matcher.matches, key=lambda x: -len(x.overlap_ids)) matched_overlap_ids = sorted_matches[ 0].overlap_ids if matcher.matches else [] self.display['auto_add_attendee_ids'] = sorted(matched_overlap_ids) self.display['overlap_results'] = [ '%s %s: %s' % (x.top_n, x.name, x.reason) for x in sorted_matches ] self.display['overlap_attendee_ids'] = sorted(matcher.overlap_ids) if matcher.matches: attendee_ids_to_admin_hash_and_event_ids = sorted_matches[ 0].get_attendee_lookups() self.display[ 'attendee_ids_to_admin_hash_and_event_ids'] = attendee_ids_to_admin_hash_and_event_ids self.display['event'] = e self.display['event_id'] = event_id self.display['fb_event'] = fb_event self.jinja_env.filters[ 'highlight_keywords'] = event_classifier.highlight_keywords self.display['track_analytics'] = False self.render_template('admin_edit')