def runTest(self):
     fb_event = dict(info=dict(name="FB Event",
                               description="more stuff here, dance class"))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertEqual(set(['dance']), classified_event.dance_matches())
     self.assertEqual(set(['dance class']),
                      classified_event.event_matches())
    def get(self):
        from dancedeets.nlp.soulline.tests import classifier_test

        tb = classifier_test.TestSoulLine()
        tb.fbl = fb_api.FBLookup("dummyid",
                                 unittest.get_local_access_token_for_testing())

        event_runs = []

        good_ids = set(classifier_test.GOOD_IDS)

        all_ids = classifier_test.GOOD_IDS + classifier_test.BAD_IDS
        for event_id in all_ids:
            fb_event = tb.get_event(event_id)
            classified_event = event_classifier.get_classified_event(fb_event)
            data = classifier_test.FUNC(classified_event)
            event_runs.append({
                'id': event_id,
                'event': fb_event,
                'desired_result': event_id in good_ids,
                'result': bool(data[0]),
                'result_string': data[0],
                'reasons': data[1],
            })

        self.display['false_negatives'] = len(
            [x for x in event_runs if not x['result'] and x['desired_result']])
        self.display['false_positives'] = len(
            [x for x in event_runs if x['result'] and not x['desired_result']])
        self.display['vertical'] = 'soul line'
        self.display['event_runs'] = event_runs
        self.render_template('test_nlp_results')
 def runTest(self):
     fb_event = dict(info=dict(
         name="FB Event", description="sessions jam battles cyphers dj's"))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertEqual(set([]), classified_event.dance_matches())
     self.assertEqual(set(['jam', 'battles', 'cyphers']),
                      classified_event.event_matches())
Exemple #4
0
def crawl_event_source(fbl, event_id):
    logging.info('Crawling sources for event %s', event_id)
    fb_event = fbl.get(fb_api.LookupEvent, event_id)
    if fb_event['empty']:
        logging.error('No FB Event found: %s', event_id)
        return
    e = eventdata.DBEvent.get_by_id(fb_event['info']['id'])
    thing_db.create_sources_from_event(fbl, e)

    potential_event = potential_events.make_potential_event_without_source(
        e.fb_event_id)
    classified_event = event_classifier.get_classified_event(
        fb_event, potential_event.language)
    if potential_event:
        for source_id in potential_event.source_ids_only():
            s = thing_db.Source.get_by_key_name(source_id)
            if not s:
                logging.warning(
                    "Couldn't find source %s when updating event %s",
                    source_id, e.fb_event_id)
                continue
            # TODO(lambert): doesn't handle the case of the match score increasing from <0 to >0 in the future
            if not classified_event.is_dance_event():
                s.num_false_negatives = (s.num_false_negatives or 0) + 1
            s.num_real_events = (s.num_real_events or 0) + 1
            s.put()
    def get(self):
        from dancedeets.nlp.soulline.tests import classifier_test

        tb = classifier_test.TestSoulLine()
        tb.fbl = fb_api.FBLookup("dummyid", unittest.get_local_access_token_for_testing())

        event_runs = []

        good_ids = set(classifier_test.GOOD_IDS)

        all_ids = classifier_test.GOOD_IDS + classifier_test.BAD_IDS
        for event_id in all_ids:
            fb_event = tb.get_event(event_id)
            classified_event = event_classifier.get_classified_event(fb_event)
            data = classifier_test.FUNC(classified_event)
            event_runs.append({
                'id': event_id,
                'event': fb_event,
                'desired_result': event_id in good_ids,
                'result': bool(data[0]),
                'result_string': data[0],
                'reasons': data[1],
            })

        self.display['false_negatives'] = len([x for x in event_runs if not x['result'] and x['desired_result']])
        self.display['false_positives'] = len([x for x in event_runs if x['result'] and not x['desired_result']])
        self.display['vertical'] = 'soul line'
        self.display['event_runs'] = event_runs
        self.render_template('test_nlp_results')
    def _run_classify(self):
        basic_classified_event = event_classifier.get_classified_event(
            self.classified_event.fb_event, language=self.classified_event.language, classifier_type=event_classifier.ClassifiedEvent
        )
        street_name = street.Style.get_name()

        results = []
        for style_name, classifier in styles.CLASSIFIERS.iteritems():
            if style_name == street_name:
                classified_event = self.classified_event
            else:
                classified_event = basic_classified_event
            # debug=None means only debug on local appengine
            this_classifier = classifier(classified_event, debug=None)
            is_dance_event = this_classifier.is_dance_event()
            if is_dance_event:
                results.append((is_dance_event, this_classifier.debug_info(), this_classifier.vertical))

        self._reasons = [x[1] for x in results]
        self._verticals = [x[2] for x in results]

        if self._verticals:
            return (True, 'found some:\n%s' % self._reasons, self._verticals)

        return (False, 'nothing', [])
def partition_ids():
    for i, (id, fb_event) in enumerate(processing.all_fb_data([], filename='local_data/PotentialFBEvents.csv')):
        e = event_classifier.get_classified_event(fb_event)
        result = event_auto_classifier.is_battle(e)
        if result[0]:
            success(e, fb_event, result)
        else:
            failure(e, fb_event, result)
Exemple #8
0
def get_classified_event(fb_event, style_name):
    street_name = street.Style.get_name()
    if style_name == street_name:
        classifier_type = event_classifier.ClassifiedEvent
    else:
        classifier_type = event_classifier.BasicClassifiedEvent

    classified_event = event_classifier.get_classified_event(fb_event, classifier_type=classifier_type)
    return classified_event
def yield_maybe_delete_bad_event(fbl, db_event):
    ctx = context.get()
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        allow_deletes = params['allow_deletes']
    else:
        allow_deletes = False

    if db_event.creating_method not in [
            eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO
    ]:
        return

    if db_event.fb_event['empty']:
        return

    import datetime
    # This is when we started adding all sorts of "crap"
    if not db_event.creation_time or db_event.creation_time < datetime.datetime(
            2016, 3, 5):
        return

    logging.info('MDBE: Check on event %s: %s', db_event.id,
                 db_event.creating_method)
    from dancedeets.event_scraper import auto_add
    from dancedeets.nlp import event_classifier
    classified_event = event_classifier.get_classified_event(db_event.fb_event)
    good_text_event = auto_add.is_good_event_by_text(db_event.fb_event,
                                                     classified_event)
    if good_text_event:
        if db_event.creating_method != eventdata.CM_AUTO:
            db_event.creating_method = eventdata.CM_AUTO
            yield op.db.Put(db_event)
    else:
        good_event = event_attendee_classifier.is_good_event_by_attendees(
            fbl, db_event.fb_event, classified_event=classified_event)
        if good_event:
            if db_event.creating_method != eventdata.CM_AUTO_ATTENDEE:
                db_event.creating_method = eventdata.CM_AUTO_ATTENDEE
                yield op.db.Put(db_event)
        else:
            logging.info('Accidentally %s added event %s: %s: %s',
                         db_event.creating_method, db_event.fb_event_id,
                         db_event.country, db_event.name)
            mr.increment('deleting-bad-event')
            result = '%s: %s: %s: %s\n' % (db_event.fb_event_id,
                                           db_event.creating_method,
                                           db_event.country, db_event.name)
            yield result.encode('utf-8')
            if allow_deletes:
                from dancedeets.search import search
                search.delete_from_fulltext_search_index(db_event.fb_event_id)
                yield op.db.Delete(db_event)
                display_event = search.DisplayEvent.get_by_id(
                    db_event.fb_event_id)
                if display_event:
                    yield op.db.Delete(display_event)
def yield_maybe_delete_bad_event(fbl, db_event):
    ctx = context.get()
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        allow_deletes = params['allow_deletes']
    else:
        allow_deletes = False

    if db_event.creating_method not in [eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO]:
        return

    if db_event.fb_event['empty']:
        return

    import datetime
    # This is when we started adding all sorts of "crap"
    if not db_event.creation_time or db_event.creation_time < datetime.datetime(2016, 3, 5):
        return

    logging.info('MDBE: Check on event %s: %s', db_event.id, db_event.creating_method)
    from dancedeets.event_scraper import auto_add
    from dancedeets.nlp import event_classifier
    classified_event = event_classifier.get_classified_event(db_event.fb_event)
    good_text_event = auto_add.is_good_event_by_text(db_event.fb_event, classified_event)
    if good_text_event:
        if db_event.creating_method != eventdata.CM_AUTO:
            db_event.creating_method = eventdata.CM_AUTO
            yield op.db.Put(db_event)
    else:
        good_event = event_attendee_classifier.is_good_event_by_attendees(fbl, db_event.fb_event, classified_event=classified_event)
        if good_event:
            if db_event.creating_method != eventdata.CM_AUTO_ATTENDEE:
                db_event.creating_method = eventdata.CM_AUTO_ATTENDEE
                yield op.db.Put(db_event)
        else:
            logging.info(
                'Accidentally %s added event %s: %s: %s', db_event.creating_method, db_event.fb_event_id, db_event.country, db_event.name
            )
            mr.increment('deleting-bad-event')
            result = '%s: %s: %s: %s\n' % (db_event.fb_event_id, db_event.creating_method, db_event.country, db_event.name)
            yield result.encode('utf-8')
            if allow_deletes:
                from dancedeets.search import search
                search.delete_from_fulltext_search_index(db_event.fb_event_id)
                yield op.db.Delete(db_event)
                display_event = search.DisplayEvent.get_by_id(db_event.fb_event_id)
                if display_event:
                    yield op.db.Delete(display_event)
Exemple #11
0
def basic_match(fb_event):
    e = event_classifier.get_classified_event(fb_event)
    if not full_run:
        print e.processed_text.get_tokenized_text()
    if positive_classifier:
        c = event_auto_classifier.is_auto_add_event(e)
        result = c.result
    else:
        result = event_auto_classifier.is_auto_notadd_event(e)
    # classified as good, but not supposed to be in the good set of ids:
    if result[0] and fb_event['info']['id'] not in training_data.good_ids:
        # false positive
        print fb_event['info']['id'], result
    if not full_run:
        print fb_event['info']['id'], result
    return result[0]
def get_matcher(fbl, fb_event, fb_event_attending_maybe=None, classified_event=None):
    if classified_event is None:
        classified_event = event_classifier.get_classified_event(fb_event)

    event_id = fb_event['info']['id']

    if fb_event_attending_maybe is None:
        try:
            fb_event_attending_maybe = fbl.get(fb_api.LookupEventAttendingMaybe, event_id)
        except fb_api.NoFetchedDataException:
            logging.info('Event %s could not fetch event attendees, aborting.', event_id)
            return None

    matcher = EventAttendeeMatcher(fb_event, fb_event_attending_maybe, classified_event)
    matcher.classify()
    return matcher
Exemple #13
0
def basic_match(fb_event):
    e = event_classifier.get_classified_event(fb_event)
    if not full_run:
        print e.processed_text.get_tokenized_text()
    if positive_classifier:
        c = event_auto_classifier.is_auto_add_event(e)
        result = c.result
        #result = event_auto_classifier.has_good_djs_title(e)
        #result = event_auto_classifier.is_workshop(e)
    else:
        result = event_auto_classifier.is_auto_notadd_event(e)
        #result = event_auto_classifier.is_bad_classical_dance(e)
    # classified as good, but not supposed to be in the good set of ids:
    if result[0] and fb_event['info']['id'] not in good_ids:
        # false positive
        print fb_event['info']['id'], result
    if not full_run:
        print fb_event['info']['id'], result
    return result[0]
Exemple #14
0
def get_matcher(fbl,
                fb_event,
                fb_event_attending_maybe=None,
                classified_event=None):
    if classified_event is None:
        classified_event = event_classifier.get_classified_event(fb_event)

    event_id = fb_event['info']['id']

    if fb_event_attending_maybe is None:
        try:
            fb_event_attending_maybe = fbl.get(
                fb_api.LookupEventAttendingMaybe, event_id)
        except fb_api.NoFetchedDataException:
            logging.info('Event %s could not fetch event attendees, aborting.',
                         event_id)
            return None

    matcher = EventAttendeeMatcher(fb_event, fb_event_attending_maybe,
                                   classified_event)
    matcher.classify()
    return matcher
 def _run_event(self, event_id):
     fb_event = self.get_event(event_id)
     classified_event = event_classifier.get_classified_event(fb_event)
     data = self.classifier_func(classified_event)
     return data
    def get(self):

        past_event = self.request.get('past_event', None)
        if past_event == '1':
            past_event = True
        elif past_event == '0':
            past_event = False
        if past_event is not None:
            past_event_query = 'AND past_event = %s' % past_event
        else:
            past_event_query = ''

        number_of_events = int(self.request.get('number_of_events', '20'))
        unseen_potential_events = list(
            potential_events.PotentialEvent.gql(
                "WHERE looked_at = NULL AND match_score > 0 %s ORDER BY match_score DESC LIMIT %s"
                % (past_event_query, number_of_events)))
        if len(unseen_potential_events) < number_of_events:
            unseen_potential_events += list(
                potential_events.PotentialEvent.gql(
                    "WHERE looked_at = NULL AND match_score = 0 AND show_even_if_no_score = True %s ORDER BY match_score DESC LIMIT %s"
                    % (past_event_query,
                       number_of_events - len(unseen_potential_events))))

        potential_event_dict = dict(
            (x.key().name(), x) for x in unseen_potential_events)
        already_added_event_ids = [
            x.string_id()
            for x in eventdata.DBEvent.get_by_ids(list(potential_event_dict),
                                                  keys_only=True) if x
        ]
        # construct a list of not-added ids for display, but keep the list of all ids around so we can still mark them as processed down below
        potential_event_notadded_ids = list(
            set(potential_event_dict).difference(already_added_event_ids))
        potential_event_notadded_ids.sort(
            key=lambda x: -(potential_event_dict[x].match_score or 0))

        # Limit to 20 at a time so we don't overwhelm the user.
        non_zero_events = potential_events.PotentialEvent.gql(
            "WHERE looked_at = NULL AND match_score > 0 %s" %
            past_event_query).count(20000)
        zero_events = potential_events.PotentialEvent.gql(
            "WHERE looked_at = NULL AND match_score = 0 AND show_even_if_no_score = True %s"
            % past_event_query).count(20000)
        total_potential_events = non_zero_events + zero_events

        has_more_events = total_potential_events > number_of_events
        potential_event_notadded_ids = potential_event_notadded_ids[:
                                                                    number_of_events]

        self.fbl.request_multi(fb_api.LookupEvent,
                               potential_event_notadded_ids)
        # self.fbl.request_multi(fb_api.LookupEventAttending, potential_event_notadded_ids)
        self.finish_preload()

        template_events = []
        for e in potential_event_notadded_ids:
            try:
                fb_event = self.fbl.fetched_data(fb_api.LookupEvent, e)
                fb_event_attending = None  # self.fbl.fetched_data(fb_api.LookupEventAttending, e)
            except KeyError:
                logging.error("Failed to load event id %s", e)
                continue
            if fb_event['empty']:
                continue
            classified_event = event_classifier.get_classified_event(
                fb_event, potential_event_dict[e])
            if classified_event.is_dance_event():
                reason = classified_event.reason()
                dance_words_str = ', '.join(
                    list(classified_event.dance_matches()))
                event_words_str = ', '.join(
                    list(classified_event.event_matches()))
                wrong_words_str = ', '.join(
                    list(classified_event.wrong_matches()))
            else:
                reason = None
                dance_words_str = 'NONE'
                event_words_str = 'NONE'
                wrong_words_str = 'NONE'
            location_info = None  # event_locations.LocationInfo(fb_event, debug=True)
            potential_event_dict[
                e] = potential_events.update_scores_for_potential_event(
                    potential_event_dict[e], fb_event, fb_event_attending)
            template_events.append(
                dict(fb_event=fb_event,
                     classified_event=classified_event,
                     dance_words=dance_words_str,
                     event_words=event_words_str,
                     wrong_words=wrong_words_str,
                     keyword_reason=reason,
                     potential_event=potential_event_dict[e],
                     location_info=location_info))
        template_events = sorted(
            template_events,
            key=lambda x: -len(x['potential_event'].sources()))
        self.display['number_of_events'] = number_of_events
        self.display['total_potential_events'] = '%s + %s' % (non_zero_events,
                                                              zero_events)
        self.display['has_more_events'] = has_more_events
        self.display['potential_events_listing'] = template_events
        self.display['potential_ids'] = ','.join(
            already_added_event_ids + potential_event_notadded_ids
        )  # use all ids, since we want to mark already-added ids as processed as well. but only the top N of the potential event ids that we're showing to the user.
        self.display['track_analytics'] = False
        self.render_template('admin_potential_events')
 def runTest(self):
     fb_event = dict(info=dict(name="FB Event", description="more stuff here, dance class"))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertEqual(set(['dance']), classified_event.dance_matches())
     self.assertEqual(set(['dance class']), classified_event.event_matches())
def really_classify_events(fbl, new_pe_list, new_fb_list, allow_posting=True):
    if not new_pe_list:
        new_pe_list = [None] * len(new_fb_list)
    logging.info('Filtering out already-added events and others, have %s remaining events to run the classifier on', len(new_fb_list))
    fb_event_ids = [x['info']['id'] for x in new_fb_list]
    fb_attending_maybe_list = fbl.get_multi(fb_api.LookupEventAttendingMaybe, fb_event_ids, allow_fail=True)

    results = []
    for pe, fb_event, fb_event_attending_maybe in zip(new_pe_list, new_fb_list, fb_attending_maybe_list):
        event_id = fb_event['info']['id']
        logging.info('Is Good Event By Text: %s: Checking...', event_id)
        # And then classify it appropriately
        classified_event = event_classifier.get_classified_event(fb_event)
        auto_add_result = event_auto_classifier.is_auto_add_event(classified_event)
        logging.info('Is Good Event By Text: %s: %s', event_id, auto_add_result)
        good_event = False
        if auto_add_result.is_good_event():
            good_event = True
            method = eventdata.CM_AUTO
            verticals = auto_add_result.verticals()
        elif fb_event_attending_maybe:
            logging.info('Is Good Event By Attendees: %s: Checking...', event_id)
            good_event = event_attendee_classifier.is_good_event_by_attendees(
                fbl, fb_event, fb_event_attending_maybe=fb_event_attending_maybe, classified_event=classified_event
            )
            logging.info('Is Good Event By Attendees: %s: %s', event_id, good_event)
            method = eventdata.CM_AUTO_ATTENDEE
            verticals = [street.Style.get_name()]
        if good_event:
            result = '+%s\n' % '\t'.join((event_id, fb_event['info'].get('name', '')))
            try:
                invite_ids = pe.get_invite_uids() if pe else []
                logging.info('VTFI %s: Adding event %s, due to pe-invite-ids: %s', event_id, event_id, invite_ids)
                e = add_entities.add_update_fb_event(
                    fb_event,
                    fbl,
                    visible_to_fb_uids=invite_ids,
                    creating_method=method,
                    allow_posting=allow_posting,
                    verticals=verticals,
                )
                pe2 = potential_events.PotentialEvent.get_by_key_name(event_id)
                pe2.looked_at = True
                pe2.auto_looked_at = True
                pe2.put()
                # TODO(lambert): handle un-add-able events differently
                results.append(result)
                mr.increment('auto-added-dance-events')
                if e.start_time < datetime.datetime.now():
                    mr.increment('auto-added-dance-events-past')
                    # mr.increment('auto-added-dance-events-past-eventid-%s' % event_id)
                    for vertical in e.verticals:
                        mr.increment('auto-added-dance-event-past-vertical-%s' % vertical)
                else:
                    mr.increment('auto-added-dance-events-future')
                    for vertical in e.verticals:
                        mr.increment('auto-added-dance-event-future-vertical-%s' % vertical)
                for vertical in e.verticals:
                    mr.increment('auto-added-dance-event-vertical-%s' % vertical)
            except fb_api.NoFetchedDataException as e:
                logging.error("Error adding event %s, no fetched data: %s", event_id, e)
            except add_entities.AddEventException as e:
                logging.warning("Error adding event %s, no fetched data: %s", event_id, e)
    return results
 def runTest(self):
     fb_event = dict(info=dict(name="FB Event", description="sessions jam battles cyphers dj's"))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertEqual(set([]), classified_event.dance_matches())
     self.assertEqual(set(['jam', 'battles', 'cyphers']), classified_event.event_matches())
 def runTest(self):
     fb_event = dict(info=dict(name=u'evento di danza', description=u'prima andiamo qui, poi andiamo lì'))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertNotIn('poi', classified_event.processed_text.text)
def yield_cleanup_verticals(fbl, db_event):
    ctx = context.get()
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        allow_deletes = params['allow_deletes']
    else:
        allow_deletes = False

    if db_event.creating_method not in [
            eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO
    ]:
        return

    if db_event.fb_event['empty']:
        return

    has_street = 'STREET' in db_event.verticals

    logging.info('Is Good Event By Text: %s: Checking...', db_event.id)
    classified_event = event_classifier.get_classified_event(db_event.fb_event)
    auto_add_result = event_auto_classifier.is_auto_add_event(classified_event)
    logging.info('Is Good Event By Text: %s: %s', db_event.id, auto_add_result)

    verticals = []
    if auto_add_result.is_good_event():
        verticals = auto_add_result.verticals()

    if has_street and 'STREET' not in verticals:
        verticals += ['STREET']

    for vertical in set(verticals).difference(db_event.verticals):
        mr.increment('adding-new-vertical-%s' % vertical)
    for vertical in set(db_event.verticals).difference(verticals):
        mr.increment('removing-old-vertical-%s' % vertical)

    old_verticals = db_event.verticals
    db_event.verticals = verticals
    mr.increment('event-resave')
    for vertical in db_event.verticals:
        mr.increment('event-vertical-total-%s' % vertical)

    if db_event.start_time < datetime.datetime.now():
        mr.increment('event-resave-past')
        for vertical in db_event.verticals:
            mr.increment('event-vertical-past-%s' % vertical)
    else:
        mr.increment('event-resave-future')
        for vertical in db_event.verticals:
            mr.increment('event-vertical-future-%s' % vertical)

    changed = set(old_verticals) != set(verticals)

    if verticals:
        if changed:
            db_event.put()
    else:
        admin_ids = [admin['id'] for admin in db_event.admins]
        if allow_deletes:
            db_event.key.delete()
        mr.increment('deleting-bad-event')
        sources = thing_db.Source.get_by_key_name(admin_ids)
        for source in sources:
            if not source:
                continue
            num_events = eventdata.DBEvent.query(
                eventdata.DBEvent.admin_fb_uids == source.graph_id).count(1000)
            if num_events == 0:
                if allow_deletes:
                    source.delete()
                mr.increment('deleting-bad-source')
Exemple #22
0
def really_classify_events(fbl, new_pe_list, new_fb_list, allow_posting=True):
    if not new_pe_list:
        new_pe_list = [None] * len(new_fb_list)
    logging.info(
        'Filtering out already-added events and others, have %s remaining events to run the classifier on',
        len(new_fb_list))
    fb_event_ids = [x['info']['id'] for x in new_fb_list]
    fb_attending_maybe_list = fbl.get_multi(fb_api.LookupEventAttendingMaybe,
                                            fb_event_ids,
                                            allow_fail=True)

    results = []
    for pe, fb_event, fb_event_attending_maybe in zip(new_pe_list, new_fb_list,
                                                      fb_attending_maybe_list):
        event_id = fb_event['info']['id']
        logging.info('Is Good Event By Text: %s: Checking...', event_id)
        # And then classify it appropriately
        classified_event = event_classifier.get_classified_event(fb_event)
        auto_add_result = event_auto_classifier.is_auto_add_event(
            classified_event)
        logging.info('Is Good Event By Text: %s: %s', event_id,
                     auto_add_result)
        good_event = False
        if auto_add_result.is_good_event():
            good_event = True
            method = eventdata.CM_AUTO
            verticals = auto_add_result.verticals()
        elif fb_event_attending_maybe:
            logging.info('Is Good Event By Attendees: %s: Checking...',
                         event_id)
            good_event = event_attendee_classifier.is_good_event_by_attendees(
                fbl,
                fb_event,
                fb_event_attending_maybe=fb_event_attending_maybe,
                classified_event=classified_event)
            logging.info('Is Good Event By Attendees: %s: %s', event_id,
                         good_event)
            method = eventdata.CM_AUTO_ATTENDEE
            verticals = [street.Style.get_name()]
        if good_event:
            result = '+%s\n' % '\t'.join(
                (event_id, fb_event['info'].get('name', '')))
            try:
                invite_ids = pe.get_invite_uids() if pe else []
                logging.info(
                    'VTFI %s: Adding event %s, due to pe-invite-ids: %s',
                    event_id, event_id, invite_ids)
                e = add_entities.add_update_fb_event(
                    fb_event,
                    fbl,
                    visible_to_fb_uids=invite_ids,
                    creating_method=method,
                    allow_posting=allow_posting,
                    verticals=verticals,
                )
                pe2 = potential_events.PotentialEvent.get_by_key_name(event_id)
                pe2.looked_at = True
                pe2.auto_looked_at = True
                pe2.put()
                # TODO(lambert): handle un-add-able events differently
                results.append(result)
                mr.increment('auto-added-dance-events')
                if e.start_time < datetime.datetime.now():
                    mr.increment('auto-added-dance-events-past')
                    # mr.increment('auto-added-dance-events-past-eventid-%s' % event_id)
                    for vertical in e.verticals:
                        mr.increment(
                            'auto-added-dance-event-past-vertical-%s' %
                            vertical)
                else:
                    mr.increment('auto-added-dance-events-future')
                    for vertical in e.verticals:
                        mr.increment(
                            'auto-added-dance-event-future-vertical-%s' %
                            vertical)
                for vertical in e.verticals:
                    mr.increment('auto-added-dance-event-vertical-%s' %
                                 vertical)
            except fb_api.NoFetchedDataException as e:
                logging.error("Error adding event %s, no fetched data: %s",
                              event_id, e)
            except add_entities.AddEventException as e:
                logging.warning("Error adding event %s, no fetched data: %s",
                                event_id, e)
    return results
def yield_cleanup_verticals(fbl, db_event):
    ctx = context.get()
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        allow_deletes = params['allow_deletes']
    else:
        allow_deletes = False

    if db_event.creating_method not in [eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO]:
        return

    if db_event.fb_event['empty']:
        return

    has_street = 'STREET' in db_event.verticals

    logging.info('Is Good Event By Text: %s: Checking...', db_event.id)
    classified_event = event_classifier.get_classified_event(db_event.fb_event)
    auto_add_result = event_auto_classifier.is_auto_add_event(classified_event)
    logging.info('Is Good Event By Text: %s: %s', db_event.id, auto_add_result)

    verticals = []
    if auto_add_result.is_good_event():
        verticals = auto_add_result.verticals()

    if has_street and 'STREET' not in verticals:
        verticals += ['STREET']

    for vertical in set(verticals).difference(db_event.verticals):
        mr.increment('adding-new-vertical-%s' % vertical)
    for vertical in set(db_event.verticals).difference(verticals):
        mr.increment('removing-old-vertical-%s' % vertical)

    old_verticals = db_event.verticals
    db_event.verticals = verticals
    mr.increment('event-resave')
    for vertical in db_event.verticals:
        mr.increment('event-vertical-total-%s' % vertical)

    if db_event.start_time < datetime.datetime.now():
        mr.increment('event-resave-past')
        for vertical in db_event.verticals:
            mr.increment('event-vertical-past-%s' % vertical)
    else:
        mr.increment('event-resave-future')
        for vertical in db_event.verticals:
            mr.increment('event-vertical-future-%s' % vertical)

    changed = set(old_verticals) != set(verticals)

    if verticals:
        if changed:
            db_event.put()
    else:
        admin_ids = [admin['id'] for admin in db_event.admins]
        if allow_deletes:
            db_event.key.delete()
        mr.increment('deleting-bad-event')
        sources = thing_db.Source.get_by_key_name(admin_ids)
        for source in sources:
            if not source:
                continue
            num_events = eventdata.DBEvent.query(eventdata.DBEvent.admin_fb_uids == source.graph_id).count(1000)
            if num_events == 0:
                if allow_deletes:
                    source.delete()
                mr.increment('deleting-bad-source')
 def runTest(self):
     fb_event = dict(
         info=dict(name=u'evento di danza',
                   description=u'prima andiamo qui, poi andiamo lì'))
     classified_event = event_classifier.get_classified_event(fb_event)
     self.assertNotIn('poi', classified_event.processed_text.text)
    def get(self):
        event_id = None
        if self.request.get('event_url'):
            event_id = urls.get_event_id_from_url(
                self.request.get('event_url'))
        elif self.request.get('event_id'):
            event_id = self.request.get('event_id')
        self.finish_preload()

        fb_event = get_fb_event(self.fbl, event_id)
        if not fb_event:
            logging.error('No fetched data for %s, showing error page',
                          event_id)
            return self.show_barebones_page(event_id, "No fetched data")

        e = eventdata.DBEvent.get_by_id(event_id)

        if not fb_events.is_public_ish(fb_event):
            if e:
                fb_event = e.fb_event
            else:
                self.add_error(
                    'Cannot add secret/closed events to dancedeets!')

        self.errors_are_fatal()

        owner_location = None
        if 'owner' in fb_event['info']:
            owner_id = fb_event['info']['owner']['id']
            location = self._get_location(owner_id, fb_api.LookupProfile,
                                          'profile') or self._get_location(
                                              owner_id, fb_api.LookupThingPage,
                                              'info')
            if location:
                owner_location = event_locations.city_for_fb_location(location)
        self.display['owner_location'] = owner_location

        display_event = search.DisplayEvent.get_by_id(event_id)
        # Don't insert object until we're ready to save it...
        if e and e.creating_fb_uid:
            #STR_ID_MIGRATE
            creating_user = self.fbl.get(fb_api.LookupProfile,
                                         str(e.creating_fb_uid))
            if creating_user.get('empty'):
                logging.warning(
                    'Have creating-user %s...but it is not publicly visible, so treating as None: %s',
                    e.creating_fb_uid, creating_user)
                creating_user = None
        else:
            creating_user = None

        potential_event = potential_events.make_potential_event_without_source(
            event_id)
        a = time.time()
        classified_event = event_classifier.get_classified_event(
            fb_event, potential_event.language)
        timelog.log_time_since('Running BasicText Classifier', a)
        self.display['classified_event'] = classified_event
        dance_words_str = ', '.join(list(classified_event.dance_matches()))
        if classified_event.is_dance_event():
            event_words_str = ', '.join(list(classified_event.event_matches()))
        else:
            event_words_str = 'NONE'
        self.display['classifier_dance_words'] = dance_words_str
        self.display['classifier_event_words'] = event_words_str
        self.display['creating_user'] = creating_user

        self.display['potential_event'] = potential_event
        self.display['display_event'] = display_event

        start = time.time()
        add_result = event_auto_classifier.is_auto_add_event(classified_event)
        notadd_result = event_auto_classifier.is_auto_notadd_event(
            classified_event, auto_add_result=add_result)
        timelog.log_time_since('Running Text Classifier', start)

        auto_classified = ''
        if add_result.is_good_event():
            auto_classified += 'add: %s.\n' % add_result
        if notadd_result[0]:
            auto_classified += 'notadd: %s.\n' % notadd_result[1]

        self.display['auto_classified_add'] = add_result
        self.display['auto_classified_notadd'] = notadd_result
        styles = categories.find_styles(fb_event)
        event_types = styles + categories.find_event_types(fb_event)
        self.display['auto_categorized_types'] = ', '.join(
            x.public_name for x in event_types)

        a = time.time()
        fb_event_attending_maybe = get_fb_event(
            self.fbl, event_id, lookup_type=fb_api.LookupEventAttendingMaybe)
        timelog.log_time_since('Loading FB Event Attending Data', a)
        a = time.time()

        location_info = event_locations.LocationInfo(
            fb_event,
            fb_event_attending_maybe=fb_event_attending_maybe,
            db_event=e,
            debug=True)
        self.display['location_info'] = location_info
        if location_info.fb_address:
            fb_geocode = gmaps_api.lookup_address(location_info.fb_address)
            self.display['fb_geocoded_address'] = formatting.format_geocode(
                fb_geocode)
        else:
            self.display['fb_geocoded_address'] = ''
        city_name = 'Unknown'
        if location_info.geocode:
            city = cities_db.get_nearby_city(
                location_info.geocode.latlng(),
                country=location_info.geocode.country())
            if city:
                city_name = city.display_name()
        self.display['ranking_city_name'] = city_name

        person_ids = fb_events.get_event_attendee_ids(fb_event_attending_maybe)
        if location_info.geocode:
            data = person_city.get_data_fields(person_ids,
                                               location_info.geocode.latlng())
            self.display['attendee_distance_info'] = data
        else:
            self.display['attendee_distance_info'] = 'Unknown'

        matcher = event_attendee_classifier.get_matcher(
            self.fbl,
            fb_event,
            fb_event_attending_maybe=fb_event_attending_maybe,
            classified_event=classified_event)
        timelog.log_time_since('Running Attendee Classifier', a)
        # print '\n'.join(matcher.results)
        sorted_matches = sorted(matcher.matches,
                                key=lambda x: -len(x.overlap_ids))
        matched_overlap_ids = sorted_matches[
            0].overlap_ids if matcher.matches else []
        self.display['auto_add_attendee_ids'] = sorted(matched_overlap_ids)
        self.display['overlap_results'] = [
            '%s %s: %s' % (x.top_n, x.name, x.reason) for x in sorted_matches
        ]

        self.display['overlap_attendee_ids'] = sorted(matcher.overlap_ids)

        if matcher.matches:
            attendee_ids_to_admin_hash_and_event_ids = sorted_matches[
                0].get_attendee_lookups()
            self.display[
                'attendee_ids_to_admin_hash_and_event_ids'] = attendee_ids_to_admin_hash_and_event_ids

        self.display['event'] = e
        self.display['event_id'] = event_id
        self.display['fb_event'] = fb_event

        self.jinja_env.filters[
            'highlight_keywords'] = event_classifier.highlight_keywords

        self.display['track_analytics'] = False
        self.render_template('admin_edit')