Esempio n. 1
0
def scrape_sources_for_events(sources):
    fbl = fb_mapreduce.get_fblookup()
    fbl.allow_cache = False
    discovered_list = thing_scraper.discover_events_from_sources(fbl, sources)
    for x in discovered_list:
        state = (x.event_id, x.source_id, x.source_field, x.extra_source_id)
        mr.increment('found-event-to-check')
        yield (_shard_for(x.event_id), json.dumps(state))
Esempio n. 2
0
def classify_events(fbl, pe_list, fb_list):
    results = []
    for pe, fb_event in zip(pe_list, fb_list):
        if fb_event and fb_event['empty']:
            fb_event = None

        # Get these past events out of the way, saved, then continue.
        # Next time through this mapreduce, we shouldn't need to process them.
        if pe.set_past_event(fb_event):
            pe.put()
        if not fb_event:
            continue

        # Don't process events we've already looked at, or don't need to look at.
        # This doesn't happen with the mapreduce that pre-filters them out,
        # but it does happen when we scrape users potential events and throw them all in here.
        if not pe.should_look_at or pe.looked_at:
            continue

        classified_event = event_classifier.classified_event_from_fb_event(fb_event)
        classified_event.classify()
        auto_add_result = event_auto_classifier.is_auto_add_event(classified_event)
        if auto_add_result[0]:
            logging.info("Found event %s, looking up location", pe.fb_event_id)
            location_info = event_locations.LocationInfo(fb_event)
            result = '+%s\n' % '\t'.join(unicode(x) for x in (pe.fb_event_id, True, location_info.final_city, location_info.final_city is not None, location_info.fb_address, fb_event['info'].get('name', '')))
            try:
                logging.info('VTFI %s: Adding event %s, due to pe-invite-ids: %s', pe.fb_event_id, pe.fb_event_id, pe.get_invite_uids())
                add_entities.add_update_event(fb_event, fbl, visible_to_fb_uids=pe.get_invite_uids(), creating_method=eventdata.CM_AUTO)
                pe2 = potential_events.PotentialEvent.get_by_key_name(pe.fb_event_id)
                pe2.looked_at = True
                pe2.auto_looked_at = True
                pe2.put()
                # TODO(lambert): handle un-add-able events differently
                results.append(result)
                mr.increment('auto-added-dance-events')
            except fb_api.NoFetchedDataException as e:
                logging.error("Error adding event %s, no fetched data: %s", pe.fb_event_id, e)
            except add_entities.AddEventException as e:
                logging.warning("Error adding event %s, no fetched data: %s", pe.fb_event_id, e)
        auto_notadd_result = event_auto_classifier.is_auto_notadd_event(classified_event, auto_add_result=auto_add_result)
        if auto_notadd_result[0]:
            pe2 = potential_events.PotentialEvent.get_by_key_name(pe.fb_event_id)
            pe2.looked_at = True
            pe2.auto_looked_at = True
            pe2.put()
            result = '-%s\n' % '\t'.join(unicode(x) for x in (pe.fb_event_id, fb_event['info'].get('name', '')))
            results.append(result)
            mr.increment('auto-notadded-dance-events')
    return results
Esempio n. 3
0
def yield_maybe_delete_bad_event(fbl, db_event):
    ctx = context.get()
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        allow_deletes = params['allow_deletes']
    else:
        allow_deletes = False

    if db_event.creating_method not in [eventdata.CM_AUTO_ATTENDEE, eventdata.CM_AUTO]:
        return

    if db_event.fb_event['empty']:
        return

    import datetime
    # This is when we started adding all sorts of "crap"
    if not db_event.creation_time or db_event.creation_time < datetime.datetime(2016, 3, 5):
        return

    logging.info('MDBE: Check on event %s: %s', db_event.id, db_event.creating_method)
    from event_scraper import auto_add
    from nlp import event_classifier
    classified_event = event_classifier.get_classified_event(db_event.fb_event)
    good_text_event = auto_add.is_good_event_by_text(db_event.fb_event, classified_event)
    if good_text_event:
        if db_event.creating_method != eventdata.CM_AUTO:
            db_event.creating_method = eventdata.CM_AUTO
            yield op.db.Put(db_event)
    else:
        good_event = event_attendee_classifier.is_good_event_by_attendees(fbl, db_event.fb_event, classified_event=classified_event)
        if good_event:
            if db_event.creating_method != eventdata.CM_AUTO_ATTENDEE:
                db_event.creating_method = eventdata.CM_AUTO_ATTENDEE
                yield op.db.Put(db_event)
        else:
            logging.info(
                'Accidentally %s added event %s: %s: %s', db_event.creating_method, db_event.fb_event_id, db_event.country, db_event.name
            )
            mr.increment('deleting-bad-event')
            result = '%s: %s: %s: %s\n' % (db_event.fb_event_id, db_event.creating_method, db_event.country, db_event.name)
            yield result.encode('utf-8')
            if allow_deletes:
                from search import search
                search.delete_from_fulltext_search_index(db_event.fb_event_id)
                yield op.db.Delete(db_event)
                display_event = search.DisplayEvent.get_by_id(db_event.fb_event_id)
                if display_event:
                    yield op.db.Delete(display_event)
Esempio n. 4
0
def count_private_events(fbl, e_list):
    for e in e_list:
        try:
            fbe = e.fb_event
            if 'info' not in fbe:
                logging.error("skipping row2 for event id %s", e.fb_event_id)
                continue
            attendees = fb_api.get_all_members_count(fbe)
            if not fb_events.is_public(fbe) and fb_events.is_public_ish(fbe):
                mr.increment('nonpublic-and-large')
            privacy = fbe['info'].get('privacy', 'UNKNOWN')
            mr.increment('privacy-%s' % privacy)

            start_date = e.start_time.strftime('%Y-%m-%d') if e.start_time else ''
            yield '%s\n' % '\t'.join(str(x) for x in [e.fb_event_id, start_date, privacy, attendees])
        except fb_api.NoFetchedDataException:
            logging.error("skipping row for event id %s", e.fb_event_id)
Esempio n. 5
0
def count_private_events(fbl, e_list):
    for e in e_list:
        try:
            fbe = e.fb_event
            if 'info' not in fbe:
                logging.error("skipping row2 for event id %s", e.fb_event_id)
                continue
            attendees = fb_api.get_all_members_count(fbe)
            privacy = fbe['info'].get('privacy', 'OPEN')
            if privacy != 'OPEN' and attendees > 60:
                mr.increment('nonpublic-and-large')
            mr.increment('privacy-%s' % privacy)

            start_date = e.start_time.strftime(
                '%Y-%m-%d') if e.start_time else ''
            yield '%s\n' % '\t'.join(
                str(x)
                for x in [e.fb_event_id, start_date, privacy, attendees])
        except fb_api.NoFetchedDataException:
            logging.error("skipping row for event id %s", e.fb_event_id)
def really_classify_events(fbl, new_pe_list, new_fb_list, allow_posting=True):
    if not new_pe_list:
        new_pe_list = [None] * len(new_fb_list)
    logging.info('Filtering out already-added events and others, have %s remaining events to run the classifier on', len(new_fb_list))
    fb_event_ids = [x['info']['id'] for x in new_fb_list]
    fb_attending_maybe_list = fbl.get_multi(fb_api.LookupEventAttendingMaybe, fb_event_ids, allow_fail=True)

    results = []
    for pe, fb_event, fb_event_attending_maybe in zip(new_pe_list, new_fb_list, fb_attending_maybe_list):
        event_id = fb_event['info']['id']
        logging.info('Is Good Event By Text: %s: Checking...', event_id)
        classified_event = event_classifier.get_classified_event(fb_event)
        auto_add_result = event_auto_classifier.is_auto_add_event(classified_event)
        logging.info('Is Good Event By Text: %s: %s', event_id, auto_add_result)
        good_event = False
        if auto_add_result and auto_add_result[0]:
            good_event = auto_add_result[0]
            method = eventdata.CM_AUTO
        elif fb_event_attending_maybe:
            logging.info('Is Good Event By Attendees: %s: Checking...', event_id)
            good_event = event_attendee_classifier.is_good_event_by_attendees(
                fbl, fb_event, fb_event_attending_maybe=fb_event_attending_maybe, classified_event=classified_event
            )
            logging.info('Is Good Event By Attendees: %s: %s', event_id, good_event)
            method = eventdata.CM_AUTO_ATTENDEE
        if good_event:
            result = '+%s\n' % '\t'.join((event_id, fb_event['info'].get('name', '')))
            try:
                invite_ids = pe.get_invite_uids() if pe else []
                logging.info('VTFI %s: Adding event %s, due to pe-invite-ids: %s', event_id, event_id, invite_ids)
                e = add_entities.add_update_event(
                    fb_event, fbl, visible_to_fb_uids=invite_ids, creating_method=method, allow_posting=allow_posting
                )
                pe2 = potential_events.PotentialEvent.get_by_key_name(event_id)
                pe2.looked_at = True
                pe2.auto_looked_at = True
                pe2.put()
                # TODO(lambert): handle un-add-able events differently
                results.append(result)
                mr.increment('auto-added-dance-events')
                if e.start_time < datetime.datetime.now():
                    mr.increment('auto-added-dance-events-past')
                    mr.increment('auto-added-dance-events-past-eventid-%s' % event_id)
                else:
                    mr.increment('auto-added-dance-events-future')
            except fb_api.NoFetchedDataException as e:
                logging.error("Error adding event %s, no fetched data: %s", event_id, e)
            except add_entities.AddEventException as e:
                logging.warning("Error adding event %s, no fetched data: %s", event_id, e)
    return results
def classify_events(fbl, pe_list, fb_list):
    new_pe_list = []
    new_fb_list = []
    # Go through and find all potential events we actually want to attempt to classify
    for pe, fb_event in zip(pe_list, fb_list):
        # Get these past events out of the way, saved, then continue.
        # Next time through this mapreduce, we shouldn't need to process them.
        if pe.set_past_event(fb_event):
            pe.put()

        if not fb_event or fb_event['empty']:
            mr.increment('skip-due-to-empty')
            continue

        # Don't process events we've already looked at, or don't need to look at.
        # This doesn't happen with the mapreduce that pre-filters them out,
        # but it does happen when we scrape users potential events and throw them all in here.
        if pe.looked_at:
            logging.info('Already looked at event (added, or manually discarded), so no need to re-process.')
            mr.increment('skip-due-to-looked-at')
            continue

        event_id = pe.fb_event_id
        if not re.match(r'^\d+$', event_id):
            logging.error('Found a very strange potential event id: %s', event_id)
            mr.increment('skip-due-to-bad-id')
            continue

        new_pe_list.append(pe)
        new_fb_list.append(fb_event)
    return really_classify_events(fbl, new_pe_list, new_fb_list)
Esempio n. 8
0
    def get_json(self, **kwargs):
        mr.increment('gmaps-api-%s' % self.name)
        if self.use_private_key:
            kwargs['client'] = 'free-dancedeets'
            unsigned_url_path = "%s?%s" % (self.path, urls.urlencode(kwargs))
            private_key = google_maps_private_key
            decoded_key = base64.urlsafe_b64decode(private_key)
            signature = hmac.new(decoded_key, unsigned_url_path, hashlib.sha1)
            encoded_signature = base64.urlsafe_b64encode(signature.digest())
            url = "%s%s&signature=%s" % (self.protocol_host, unsigned_url_path,
                                         encoded_signature)
        else:
            unsigned_url_path = "%s?%s" % (self.path, urls.urlencode(kwargs))
            url = "%s%s&key=%s" % (self.protocol_host, unsigned_url_path,
                                   google_server_key)

        logging.info('geocoding url: %s', url)
        result = urllib.urlopen(url).read()
        logging.info('geocoding results: %s', result)

        try:
            return json.loads(result)
        except ValueError:
            return None
Esempio n. 9
0
 def track_lookup(cls):
     mr.increment('fb-lookups-source', 1)
Esempio n. 10
0
def update_mailchimp(user):
    ctx = context.get()
    mailchimp_list_id = -1
    if ctx:
        params = ctx.mapreduce_spec.mapper.params
        mailchimp_list_id = params.get('mailchimp_list_id', mailchimp_list_id)
    if mailchimp_list_id == -1:
        mailchimp_list_id = mailchimp_api.LIST_ID

    trimmed_locale = user.locale or ''
    if '_' in trimmed_locale:
        trimmed_locale = trimmed_locale.split('_')[0]

    if not user.email:
        mr.increment('mailchimp-error-no-email')
        logging.info('No email for user %s: %s', user.fb_uid, user.full_name)
        return

    if user.mailchimp_email != user.email:
        # When some old users are saved, their mailchimp email will be None,
        # so we don't really need to worry about them here.
        logging.info('Updating user email to %s with old mailchimp email %s', user.email, user.mailchimp_email)
        if user.mailchimp_email != None:
            mr.increment('mailchimp-update-email-error-response')
            try:
                user_data = mailchimp_api.update_email(mailchimp_api.LIST_ID, user.mailchimp_email, user.email)
            except mailchimp_api.UserNotFound:
                mr.increment('mailchimp-update-email-error-not-found')
                logging.error('Updating user %s email to mailchimp, returned not found', user.fb_uid)
            else:
                logging.info('Result: %s', user_data)
                if user_data['email_address'] == user.email:
                    logging.info('Updating user %s email to mailchimp, returned OK', user.fb_uid)
                else:
                    mr.increment('mailchimp-update-email-error-response')
                    logging.error('Updating user %s email to mailchimp, returned %s', user.fb_uid, user_data)
        # Mark our current mailchimp_email down, so we can update it properly later if desired.
        user.mailchimp_email = user.email
        # Now that Mailchimp knows about our new user email,
        # we can update/reference it using the normal add_members() below.

    member = {
        'email_address': user.email,
        # Mailchimp is the official store of 'are they subscribed', so let's not overwrite it here
        'status_if_new': 'subscribed',
        'language': trimmed_locale,
        'merge_fields': {
            'USER_ID': user.fb_uid,  # necessary so we can update our local datastore on callbacks
            'FIRSTNAME': user.first_name or '',
            'LASTNAME': user.last_name or '',
            'FULLNAME': user.full_name or '',
            'NAME': user.first_name or user.full_name or '',
            'WEEKLY': unicode(user.send_email),
            'EXPIRED': unicode(user.expired_oauth_token),
            'LASTLOGIN': user.last_login_time.strftime('%Y-%m-%d') if user.last_login_time else '',
        },
        'timestamp_signup': user.creation_time.strftime('%Y-%m-%dT%H:%M:%S'),
        'timestamp_opt': user.creation_time.strftime('%Y-%m-%dT%H:%M:%S'),
    }
    if user.location:
        geocode = gmaps_api.lookup_address(user.location)
        if geocode:
            user_latlong = geocode.latlng()
            member['location'] = {
                'latitude': user_latlong[0],
                'longitude': user_latlong[1],
            }
        else:
            logging.warning('User %s (%s) had un-geocodable address: %s', user.fb_uid, user.full_name, user.location)

    mr.increment('mailchimp-api-call')
    result = mailchimp_api.add_members(mailchimp_list_id, [member])
    if result['errors']:
        mr.increment('mailchimp-error-response')
        logging.error('Writing user %s to mailchimp returned %s on input: %s', user.fb_uid, result['errors'], member)
    else:
        logging.info('Writing user %s to mailchimp returned OK', user.fb_uid)
Esempio n. 11
0
 def track_lookup(cls):
     mr.increment('fb-lookups-comments')
Esempio n. 12
0
 def track_lookup(cls):
     mr.increment('fb-lookups-user-events', 3)
Esempio n. 13
0
 def track_lookup(cls):
     mr.increment('fb-lookups-user')
Esempio n. 14
0
 def track_lookup(cls):
     mr.increment('fb-lookups-profile')
Esempio n. 15
0
def classify_events(fbl, pe_list, fb_list):
    results = []
    for pe, fb_event in zip(pe_list, fb_list):
        if fb_event and fb_event['empty']:
            fb_event = None

        # Get these past events out of the way, saved, then continue.
        # Next time through this mapreduce, we shouldn't need to process them.
        if pe.set_past_event(fb_event):
            pe.put()
        if not fb_event:
            continue

        # Don't process events we've already looked at, or don't need to look at.
        # This doesn't happen with the mapreduce that pre-filters them out,
        # but it does happen when we scrape users potential events and throw them all in here.
        if not pe.should_look_at or pe.looked_at:
            continue

        classified_event = event_classifier.classified_event_from_fb_event(
            fb_event)
        classified_event.classify()
        auto_add_result = event_auto_classifier.is_auto_add_event(
            classified_event)
        if auto_add_result[0]:
            logging.info("Found event %s, looking up location", pe.fb_event_id)
            location_info = event_locations.LocationInfo(fb_event)
            result = '+%s\n' % '\t'.join(
                unicode(x)
                for x in (pe.fb_event_id, location_info.exact_from_event,
                          location_info.final_city,
                          location_info.final_city is not None,
                          location_info.fb_address,
                          fb_event['info'].get('name', '')))
            try:
                add_entities.add_update_event(
                    fb_event,
                    fbl,
                    visible_to_fb_uids=pe.get_invite_uids(),
                    creating_method=eventdata.CM_AUTO)
                pe2 = potential_events.PotentialEvent.get_by_key_name(
                    pe.fb_event_id)
                pe2.looked_at = True
                pe2.auto_looked_at = True
                pe2.put()
                # TODO(lambert): handle un-add-able events differently
                results.append(result)
                mr.increment('auto-added-dance-events')
            except fb_api.NoFetchedDataException as e:
                logging.error("Error adding event %s, no fetched data: %s",
                              pe.fb_event_id, e)
            except add_entities.AddEventException as e:
                logging.warning("Error adding event %s, no fetched data: %s",
                                pe.fb_event_id, e)
        auto_notadd_result = event_auto_classifier.is_auto_notadd_event(
            classified_event, auto_add_result=auto_add_result)
        if auto_notadd_result[0]:
            pe2 = potential_events.PotentialEvent.get_by_key_name(
                pe.fb_event_id)
            pe2.looked_at = True
            pe2.auto_looked_at = True
            pe2.put()
            result = '-%s\n' % '\t'.join(
                unicode(x)
                for x in (pe.fb_event_id, fb_event['info'].get('name', '')))
            results.append(result)
            mr.increment('auto-notadded-dance-events')
    return results
Esempio n. 16
0
 def track_lookup(cls):
     mr.increment('fb-lookups-event-rsvp', 4)
Esempio n. 17
0
 def track_lookup(cls):
     mr.increment('fb-lookups-search-events')