Exemple #1
0
    def run(cls, filename, dry_run, **kwargs):
        super(MongoTask, cls).run(filename, dry_run, **kwargs)

        query = clean_command(cls.QUERY).format(**kwargs)

        log.debug(query)

        cmd = clean_command(cls.CMD)
        cmd = cmd.format(filename=filename, query=query, **kwargs)

        if dry_run:
            print('MONGO: {0}'.format(query))
        else:
            execute_shell(cmd, **kwargs)
Exemple #2
0
def upload_data(config, filepath):
    bucket = config['output_bucket']
    prefix = config['output_prefix'] or ''
    name = os.path.basename(filepath)
    target = 's3://{bucket}/{prefix}{name}'.format(bucket=bucket,
                                                   prefix=prefix,
                                                   name=name)

    log.info('Uploading file %s to %s', filepath, target)

    cmd = 'aws s3 cp --acl bucket-owner-full-control {filepath} {target}'
    cmd = cmd.format(filepath=filepath, target=target)

    if not config['dry_run']:
        local_kwargs = {'max_tries': MAX_TRIES_FOR_DATA_UPLOAD}
        execute_shell(cmd, **local_kwargs)
    else:
        log.info('cmd: %s', cmd)

    return target
Exemple #3
0
    def run(cls, filename, dry_run, **kwargs):
        super(DjangoAdminTask, cls).run(filename, dry_run, **kwargs)

        command = cls.COMMAND
        arguments = cls.ARGS.format(filename=filename, **kwargs)
        output = cls.OUT.format(filename=filename, **kwargs)
        variables = cls.VARS.format(**kwargs)

        # --database={django_database}
        # if 'django_database' not in kwargs:
        #     kwargs['django_database'] = 'default'

        cmd = clean_command(cls.CMD)
        cmd = cmd.format(command=command,
                         output=output,
                         arguments=arguments,
                         variables=variables,
                         **kwargs)

        log.info("Running django command %s.", cmd)
        if not dry_run:
            execute_shell(cmd, **kwargs)
        return cmd
Exemple #4
0
    def run(cls, filename, dry_run, **kwargs):
        super(CopyS3FileTask, cls).run(filename, dry_run, **kwargs)

        if not distutils.spawn.find_executable("aws"):
            raise FatalTaskError("The {0} task requires the awscli".format(
                cls.__name__))

        file_basename = os.path.basename(filename)
        s3_source_filename = '{prefix}/{env}/{filename}'.format(
            prefix=kwargs['external_prefix'],
            env=kwargs['environment'],
            filename=file_basename)
        s3_marker_filename = '{prefix}/{env}/{filename}'.format(
            prefix=kwargs['external_prefix'],
            env=kwargs['environment'],
            filename='job_success/_SUCCESS')

        if dry_run:
            print('Copy S3 File: {0} to {1}'.format(s3_source_filename,
                                                    filename))
        else:
            # First check to see that the export data was successfully generated
            # by looking for a marker file for that run. Return a more severe failure,
            # so that the overall environment dump fails, rather than just the particular
            # file being copied.
            head_command = "aws s3api head-object --bucket {bucket} --key {key}"

            marker_command = head_command.format(
                bucket=kwargs['pipeline_bucket'], key=s3_marker_filename)

            source_command = head_command.format(
                bucket=kwargs['pipeline_bucket'], key=s3_source_filename)

            # This code block was commented because, the first time we ran this,
            # we don't have data related to the previous run, so
            # this always raises a FatalTaskError exception, and we don't want that.

            # try:
            #     log.info("Running command with retries: %s.", marker_command)
            #     # Define retries here, to recover from temporary outages when calling S3 to find files.
            #     local_kwargs = dict(**kwargs)
            #     local_kwargs['max_tries'] = MAX_TRIES_FOR_MARKER_FILE_CHECK
            #     execute_shell(marker_command, **local_kwargs)
            # except subprocess.CalledProcessError:
            #     error_message = 'Unable to find success marker for export {0}'.format(s3_marker_filename)
            #     log.error(error_message)
            #     raise FatalTaskError(error_message)

            # Then check that the source file exists.  It's okay if it isn't,
            # as that will happen when a particular database table is empty.
            try:
                log.info("Running command %s.", source_command)
                execute_shell(source_command, **kwargs)
            except subprocess.CalledProcessError:
                log.info('Unable to find %s to copy.', s3_source_filename)
            else:
                try:
                    cmd = 'aws s3 cp s3://{bucket}/{src} {dest}'.format(
                        bucket=kwargs['pipeline_bucket'],
                        src=s3_source_filename,
                        dest=filename)
                    # Define retries here, to recover from temporary outages when calling S3 to copy files.
                    local_kwargs = dict(**kwargs)
                    local_kwargs['max_tries'] = MAX_TRIES_FOR_COPY_FILE_FROM_S3
                    execute_shell(cmd, **local_kwargs)
                except subprocess.CalledProcessError:
                    log.error('Unable to copy %s to %s', s3_source_filename,
                              filename)
                    raise