def init_luigi_logging():
    """ Initialize loggers with Luigi's logging configuration """
    env_params = luigi_interface.core()

    try:
        # Since Luigi 2.8.1
        setup_logging = importlib.import_module('luigi.setup_logging')
        setup_logging.InterfaceLogging._configured = False
        setup_logging.InterfaceLogging.setup(env_params)
        return
    except ImportError:
        pass

    if hasattr(luigi_interface, 'setup_interface_logging'):
        # Before Luigi 2.8.1
        logging_conf = env_params.logging_conf_file
        if logging_conf != '' and not os.path.exists(logging_conf):
            raise Exception(
                "Error: Unable to locate specified logging configuration file!"
            )

        if not configuration.get_config().getboolean(
                'core', 'no_configure_logging', False):
            luigi_interface.setup_interface_logging(logging_conf,
                                                    env_params.log_level)
    else:
        # Otherwise
        sys.stderr.write("Cannot configure logger.")
def main():

    update_markers = mongo_get_update_markers()

    # Make sure the updates have all mongo classes
    bulk_tasks = [
        MongoCollectionIndexTask,
        MongoCollectionEventTask,
        MongoCatalogueTask,
        MongoTaxonomyTask,
        # MongoMultimediaTask,
        MongoSiteTask,
        UnpublishTask,
        MongoDeleteTask
    ]

    def _get_task_names(tasks):
        """
        We need to initiate and get the family name, not just the class name
        MongoDeleteTask => DeleteTask
        @param tasks:
        @return:
        """
        return [unicode(task(date=0).task_family) for task in tasks]

    full_export_date = int(config.get('keemu', 'full_export_date'))

    for date, update_marker in update_markers.iteritems():

        #  If this is the fll export date, MongoDeleteTask is not required
        if full_export_date and date == full_export_date:
            bulk_task_copy = list(bulk_tasks)
            bulk_task_copy.remove(MongoDeleteTask)
            bulk_task_names = _get_task_names(bulk_task_copy)
        else:
            bulk_task_names = _get_task_names(bulk_tasks)

        # Assert that for every date we have all the bulk tasks
        missing_tasks = list(set(bulk_task_names) - set(update_marker))
        assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % (date, missing_tasks)

    # Get a list of all export files to process
    export_dates = [d for d in get_export_file_dates() if d not in update_markers.keys()]

    # Run setup_interface_logging to ensure luigi commands
    setup_interface_logging()

    sch = scheduler.CentralPlannerScheduler()

    w = BulkWorker(scheduler=sch)

    for export_date in export_dates:

        log.info('Processing date %s', export_date)
        # We only need to call the mongo delete task, as all other tasks are a requirement
        # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask
        w.add(MongoDeleteTask(date=export_date, force=True))
        w.run()
        w.stop()
    def source(self):
        return target.storage_mail_path(self.day).path

    def schema(self):
        return [
            {"name": "datetime", "type": "timestamp", "mode": "nullable"},
            {"name": "name", "type": "string", "mode": "nullable"},
            {"name": "email", "type": "integer", "mode": "nullable"},
            {"name": "campaign", "type": "string", "mode": "nullable"},
            {"name": "id", "type": "string", "integer": "nullable"}
        ]

    def configuration(self):
        return {
            'sourceFormat': "CSV"
        }


class ErrorAll(luigi.WrapperTask):
    def requires(self):
        return [
            ErrorStorageToBigQuery(datetime.date(2015, 11, 23))
        ]


if __name__ == "__main__":
    setup_interface_logging('examples/logging.ini')
    load_default_client("examples", "examples")
    luigi.run()
Exemple #4
0
def google_default_api():
    global gclient
    if gclient is None:
        gclient = GCloudClient()
        gcore.set_default_client(gclient)


class AllExamples(luigi.WrapperTask):
    def requires(self):
        return [
            CopyAllLocalToStorage(),
            DataProcExamples(),
            CopyViaDataFlowToStorage(datetime.date(2015, 11, 23)),
            CopyBigQueryToStorage(datetime.date(2015, 11, 24))
        ]


class DataProcExamples(luigi.WrapperTask):
    def requires(self):
        return [
            DataProcPigCopy(datetime.date(2015, 11, 23)),
            DataProcSparkCopy(datetime.date(2015, 11, 24)),
        ]


if __name__ == "__main__":
    load_default_client("examples", "examples")
    setup_interface_logging('examples/logging.ini')
    luigi.run()
Exemple #5
0
    return luigi.LocalTarget('/tmp/a-created')

  def run(self):
    os.system('touch "%s"' % self.output().path)

class DepTask(DependencyTriggeredTask):
  def requires(self):
    return [SimpleTask()]

  def output(self):
    return luigi.LocalTarget('/tmp/b-created')

  def run(self):
    os.system('touch "%s"' % self.output().path)

if __name__ == '__main__':
  os.system('rm -f /tmp/a-created /tmp/b-created')
  interface.setup_interface_logging()
  sch = scheduler.CentralPlannerScheduler()
  w = worker.Worker(scheduler=sch)
  w.add(SimpleTask())
  w.run()
  w.add(DepTask())
  w.run()
  os.system('rm -f /tmp/a-created /tmp/b-created')
  w.add(DepTask())
  w.run()
  os.system('rm -f /tmp/a-created')
  w.add(DepTask())
  w.run()
Exemple #6
0
    def run(self):
        os.system('touch "%s"' % self.output().path)


class DepTask(DependencyTriggeredTask):
    def requires(self):
        return [SimpleTask()]

    def output(self):
        return luigi.LocalTarget('/tmp/b-created')

    def run(self):
        os.system('touch "%s"' % self.output().path)


if __name__ == '__main__':
    os.system('rm -f /tmp/a-created /tmp/b-created')
    interface.setup_interface_logging()
    sch = scheduler.CentralPlannerScheduler()
    w = worker.Worker(scheduler=sch)
    w.add(SimpleTask())
    w.run()
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created /tmp/b-created')
    w.add(DepTask())
    w.run()
    os.system('rm -f /tmp/a-created')
    w.add(DepTask())
    w.run()
Exemple #7
0
def main():

    update_markers = mongo_get_update_markers()

    # Make sure the updates have all mongo classes
    bulk_tasks = [
        MongoCollectionIndexTask,
        MongoCollectionEventTask,
        MongoCatalogueTask,
        MongoTaxonomyTask,
        # MongoMultimediaTask,
        MongoSiteTask,
        UnpublishTask,
        MongoDeleteTask
    ]

    def _get_task_names(tasks):
        """
        We need to initiate and get the family name, not just the class name
        MongoDeleteTask => DeleteTask
        @param tasks:
        @return:
        """
        return [unicode(task(date=0).task_family) for task in tasks]

    full_export_date = int(config.get('keemu', 'full_export_date'))

    for date, update_marker in update_markers.iteritems():

        #  If this is the fll export date, MongoDeleteTask is not required
        if full_export_date and date == full_export_date:
            bulk_task_copy = list(bulk_tasks)
            bulk_task_copy.remove(MongoDeleteTask)
            bulk_task_names = _get_task_names(bulk_task_copy)
        else:
            bulk_task_names = _get_task_names(bulk_tasks)

        # Assert that for every date we have all the bulk tasks
        missing_tasks = list(set(bulk_task_names) - set(update_marker))
        assert missing_tasks == [], 'There are missing mongo tasks for date %s: %s' % (
            date, missing_tasks)

    # Get a list of all export files to process
    export_dates = [
        d for d in get_export_file_dates() if d not in update_markers.keys()
    ]

    # Run setup_interface_logging to ensure luigi commands
    setup_interface_logging()

    sch = scheduler.CentralPlannerScheduler()

    w = BulkWorker(scheduler=sch)

    for export_date in export_dates:

        log.info('Processing date %s', export_date)
        # We only need to call the mongo delete task, as all other tasks are a requirement
        # NB: This doesn't delete anything from CKAN - if that's needed change this to DeleteTask
        w.add(MongoDeleteTask(date=export_date, force=True))
        w.run()
        w.stop()