def get(self):
     time_period = self.request.get('time_period', None)
     queue = self.request.get('queue', 'fast-queue')
     filters = []
     if time_period:
         filters.append(('search_time_period', '=', time_period))
         name = 'Delete %s Bad Autoadds' % time_period
     else:
         name = 'Delete All Bad Autoadds'
     allow_deletes = self.request.get('allow_deletes', None) == '1'
     extra_mapper_params = {
         'allow_deletes': allow_deletes,
     }
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name=name,
         handler_spec='dancedeets.events.event_reloading_tasks.map_maybe_delete_bad_event',
         entity_kind='dancedeets.events.eventdata.DBEvent',
         filters=filters,
         extra_mapper_params=extra_mapper_params,
         queue=queue,
         output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
         output_writer={
             'mime_type': 'text/plain',
             'bucket_name': 'dancedeets-hrd.appspot.com',
         },
     )
Exemple #2
0
    def get(self):
        queue = self.request.get('queue', 'fast-queue')
        time_period = self.request.get('time_period', None)
        vertical = self.request.get('vertical', None)

        filters = []
        if vertical:
            filters.append(('verticals', '=', vertical))
            vertical_string = '%s ' % vertical
        else:
            vertical_string = ''

        if time_period:
            filters.append(('search_time_period', '=', time_period))
            name = 'Generate %s %sSitemaps' % (time_period, vertical_string)
        else:
            name = 'Generate %sSitemaps' % vertical_string
        fb_mapreduce.start_map(
            fbl=self.fbl,
            name=name,
            handler_spec='dancedeets.sitemaps.events.map_sitemap_event',
            entity_kind='dancedeets.events.eventdata.DBEvent',
            handle_batch_size=20,
            filters=filters,
            queue=queue,
            output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
            output_writer={
                'mime_type': 'text/plain',
                'bucket_name': 'dancedeets-hrd.appspot.com',
            },
        )
def mapreduce_create_sources_from_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Create Sources from Events',
        'dancedeets.event_scraper.thing_db.map_create_sources_from_event',
        'dancedeets.events.eventdata.DBEvent',
    )
 def get(self):
     time_period = self.request.get('time_period', None)
     queue = self.request.get('queue', 'fast-queue')
     filters = []
     if time_period:
         filters.append(('search_time_period', '=', time_period))
         name = 'Delete %s Bad Autoadds' % time_period
     else:
         name = 'Delete All Bad Autoadds'
     allow_deletes = self.request.get('allow_deletes', None) == '1'
     extra_mapper_params = {
         'allow_deletes': allow_deletes,
     }
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name=name,
         handler_spec=
         'dancedeets.events.event_reloading_tasks.map_maybe_delete_bad_event',
         entity_kind='dancedeets.events.eventdata.DBEvent',
         filters=filters,
         extra_mapper_params=extra_mapper_params,
         queue=queue,
         output_writer_spec=
         'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
         output_writer={
             'mime_type': 'text/plain',
             'bucket_name': 'dancedeets-hrd.appspot.com',
         },
     )
def mapreduce_create_sources_from_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Create Sources from Events',
        'dancedeets.event_scraper.thing_db.map_create_sources_from_event',
        'dancedeets.events.eventdata.DBEvent',
    )
def mr_load_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Load Potential Events For Users',
        handler_spec='dancedeets.event_scraper.potential_events_reloading.map_load_potential_events',
        entity_kind='dancedeets.users.users.User',
        filters=[('expired_oauth_token', '=', False)],
    )
Exemple #7
0
def mr_email_user(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Email Users',
        #TODO: MOVE
        handler_spec='dancedeets.search.email_events.map_email_user',
        entity_kind='dancedeets.users.users.User',
    )
def mr_email_user(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Email Users',
        #TODO: MOVE
        handler_spec='dancedeets.search.email_events.map_email_user',
        entity_kind='dancedeets.users.users.User',
    )
def mr_load_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Load Potential Events For Users',
        handler_spec=
        'dancedeets.event_scraper.potential_events_reloading.map_load_potential_events',
        entity_kind='dancedeets.users.users.User',
        filters=[('expired_oauth_token', '=', False)],
    )
def mr_private_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Private Events',
        'dancedeets.servlets.tools.map_dump_private_events',
        'dancedeets.events.eventdata.DBEvent',
        handle_batch_size=80,
        queue=None,
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mr_generate_training_data(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Write Training Data',
        handler_spec='dancedeets.ml.gprediction.map_training_data_for_pevents',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        handle_batch_size=20,
        entity_kind='dancedeets.event_scraper.potential_events.PotentialEvent',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
        queue=None,
    )
Exemple #12
0
def mr_private_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Private Events',
        'dancedeets.servlets.tools.map_dump_private_events',
        'dancedeets.events.eventdata.DBEvent',
        handle_batch_size=80,
        queue=None,
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mr_dump_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Potential FB Event Data',
        'dancedeets.logic.mr_dump.map_dump_fb_json',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        handle_batch_size=80,
        queue=None,
        filters=[('looked_at', '=', None)],
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mr_generate_training_data(fbl):
    fb_mapreduce.start_map(
        fbl=fbl,
        name='Write Training Data',
        handler_spec='dancedeets.ml.gprediction.map_training_data_for_pevents',
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        handle_batch_size=20,
        entity_kind='dancedeets.event_scraper.potential_events.PotentialEvent',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
        queue=None,
    )
def mr_classify_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Auto-Classify Events',
        'dancedeets.ml.mr_prediction.map_classify_events',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        filters=[('looked_at', '=', None)],
        handle_batch_size=20,
        queue='slow-queue',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mapreduce_scrape_all_sources(fbl, min_potential_events=None, queue='slow-queue'):
    # Do not do the min_potential_events>1 filter in the mapreduce filter,
    # or it will want to do a range-shard on that property. Instead, pass-it-down
    # and use it as an early-return in the per-Source processing.
    # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources...
    fb_mapreduce.start_map(
        fbl,
        'Scrape All Sources',
        'dancedeets.event_scraper.thing_scraper.map_scrape_events_from_sources',
        'dancedeets.event_scraper.thing_db.Source',
        handle_batch_size=10,
        extra_mapper_params={'min_potential_events': min_potential_events},
        queue=queue,
        randomize_tokens=True,
    )
def mr_dump_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Dump Potential FB Event Data',
        'dancedeets.logic.mr_dump.map_dump_fb_json',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        handle_batch_size=80,
        queue=None,
        filters=[('looked_at', '=', None)],
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Exemple #18
0
def mr_classify_potential_events(fbl):
    fb_mapreduce.start_map(
        fbl,
        'Auto-Classify Events',
        'dancedeets.ml.mr_prediction.map_classify_events',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        filters=[('looked_at', '=', None)],
        handle_batch_size=20,
        queue='slow-queue',
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mapreduce_scrape_all_sources(fbl,
                                 min_potential_events=None,
                                 queue='slow-queue'):
    # Do not do the min_potential_events>1 filter in the mapreduce filter,
    # or it will want to do a range-shard on that property. Instead, pass-it-down
    # and use it as an early-return in the per-Source processing.
    # TODO:....maybe we do want a range-shard filter? save on loading all the useless sources...
    fb_mapreduce.start_map(
        fbl,
        'Scrape All Sources',
        'dancedeets.event_scraper.thing_scraper.map_scrape_events_from_sources',
        'dancedeets.event_scraper.thing_db.Source',
        handle_batch_size=10,
        extra_mapper_params={'min_potential_events': min_potential_events},
        queue=queue,
        randomize_tokens=True,
    )
 def get(self):
     queue = self.request.get('queue', 'fast-queue')
     allow_deletes = self.request.get('allow_deletes', None) == '1'
     extra_mapper_params = {
         'allow_deletes': allow_deletes,
     }
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Cleanup Verticals',
         handler_spec='dancedeets.events.event_reloading_tasks.map_cleanup_verticals',
         entity_kind='dancedeets.events.eventdata.DBEvent',
         extra_mapper_params=extra_mapper_params,
         queue=queue,
         output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
         output_writer={
             'mime_type': 'text/plain',
             'bucket_name': 'dancedeets-hrd.appspot.com',
         },
     )
 def get(self):
     all_users = self.request.get('all_users', '0') == '1'
     if all_users:
         filters = []
     else:
         filters = [('expired_oauth_token', '=', False)]
     # this calls a map function wrapped by mr_user_wrap, so it works correctly on a per-user basis
     mailchimp_list_id = mailchimp_api.get_list_id()
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Load %sUsers' % ('All ' if all_users else ''),
         handler_spec='dancedeets.users.user_tasks.map_load_fb_user',
         entity_kind='dancedeets.users.users.User',
         filters=filters,
         extra_mapper_params={
             'mailchimp_list_id': mailchimp_list_id,
         },
         queue='fast-queue'
     )
 def get(self):
     queue = self.request.get('queue', 'fast-queue')
     allow_deletes = self.request.get('allow_deletes', None) == '1'
     extra_mapper_params = {
         'allow_deletes': allow_deletes,
     }
     fb_mapreduce.start_map(
         fbl=self.fbl,
         name='Cleanup Verticals',
         handler_spec=
         'dancedeets.events.event_reloading_tasks.map_cleanup_verticals',
         entity_kind='dancedeets.events.eventdata.DBEvent',
         extra_mapper_params=extra_mapper_params,
         queue=queue,
         output_writer_spec=
         'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
         output_writer={
             'mime_type': 'text/plain',
             'bucket_name': 'dancedeets-hrd.appspot.com',
         },
     )
def mr_classify_potential_events(fbl, past_event, dancey_only):
    filters = []
    if dancey_only:
        filters.append(('should_look_at', '=', True))
    if past_event is not None:
        filters.append(('past_event', '=', past_event))
    fb_mapreduce.start_map(
        fbl,
        'Auto-Add Events',
        'dancedeets.event_scraper.auto_add.map_classify_events',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        filters=filters,
        # Make sure we don't process so many that we cause the tasks to time out
        handle_batch_size=10,
        queue='fast-queue',
        output_writer_spec='mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
Exemple #24
0
def mr_classify_potential_events(fbl, past_event, dancey_only):
    filters = []
    if dancey_only:
        filters.append(('should_look_at', '=', True))
    if past_event is not None:
        filters.append(('past_event', '=', past_event))
    fb_mapreduce.start_map(
        fbl,
        'Auto-Add Events',
        'dancedeets.event_scraper.auto_add.map_classify_events',
        'dancedeets.event_scraper.potential_events.PotentialEvent',
        filters=filters,
        # Make sure we don't process so many that we cause the tasks to time out
        handle_batch_size=10,
        queue='fast-queue',
        output_writer_spec=
        'mapreduce.output_writers.GoogleCloudStorageOutputWriter',
        output_writer={
            'mime_type': 'text/plain',
            'bucket_name': 'dancedeets-hrd.appspot.com',
        },
    )
def mr_load_fb_events(
    fbl,
    display_event=False,
    load_attending=False,
    time_period=None,
    disable_updates=None,
    only_if_updated=True,
    queue='slow-queue',
    vertical=None
):
    if display_event:
        event_or_attending = 'Display Events'
        mr_func = 'map_resave_display_event'
    else:
        event_or_attending = 'Events'
        mr_func = 'map_load_fb_event'
    filters = []
    if vertical:
        filters.append(('verticals', '=', vertical))
        event_or_attending = '%s %s' % (vertical, event_or_attending)
    if time_period:
        filters.append(('search_time_period', '=', time_period))
        name = 'Load %s %s' % (time_period, event_or_attending)
    else:
        name = 'Load All %s' % (event_or_attending)
    fb_mapreduce.start_map(
        fbl=fbl,
        name=name,
        handler_spec='dancedeets.events.event_reloading_tasks.%s' % mr_func,
        entity_kind='dancedeets.events.eventdata.DBEvent',
        handle_batch_size=10,
        filters=filters,
        extra_mapper_params={
            'disable_updates': disable_updates,
            'only_if_updated': only_if_updated
        },
        queue=queue,
    )
def mr_load_fb_events(fbl,
                      display_event=False,
                      load_attending=False,
                      time_period=None,
                      disable_updates=None,
                      only_if_updated=True,
                      queue='slow-queue',
                      vertical=None):
    if display_event:
        event_or_attending = 'Display Events'
        mr_func = 'map_resave_display_event'
    else:
        event_or_attending = 'Events'
        mr_func = 'map_load_fb_event'
    filters = []
    if vertical:
        filters.append(('verticals', '=', vertical))
        event_or_attending = '%s %s' % (vertical, event_or_attending)
    if time_period:
        filters.append(('search_time_period', '=', time_period))
        name = 'Load %s %s' % (time_period, event_or_attending)
    else:
        name = 'Load All %s' % (event_or_attending)
    fb_mapreduce.start_map(
        fbl=fbl,
        name=name,
        handler_spec='dancedeets.events.event_reloading_tasks.%s' % mr_func,
        entity_kind='dancedeets.events.eventdata.DBEvent',
        handle_batch_size=10,
        filters=filters,
        extra_mapper_params={
            'disable_updates': disable_updates,
            'only_if_updated': only_if_updated
        },
        queue=queue,
    )