def run(cls, filename, dry_run, **kwargs): super(MongoTask, cls).run(filename, dry_run, **kwargs) query = clean_command(cls.QUERY).format(**kwargs) log.debug(query) cmd = clean_command(cls.CMD) cmd = cmd.format(filename=filename, query=query, **kwargs) if dry_run: print('MONGO: {0}'.format(query)) else: execute_shell(cmd, **kwargs)
def upload_data(config, filepath): bucket = config['output_bucket'] prefix = config['output_prefix'] or '' name = os.path.basename(filepath) target = 's3://{bucket}/{prefix}{name}'.format(bucket=bucket, prefix=prefix, name=name) log.info('Uploading file %s to %s', filepath, target) cmd = 'aws s3 cp --acl bucket-owner-full-control {filepath} {target}' cmd = cmd.format(filepath=filepath, target=target) if not config['dry_run']: local_kwargs = {'max_tries': MAX_TRIES_FOR_DATA_UPLOAD} execute_shell(cmd, **local_kwargs) else: log.info('cmd: %s', cmd) return target
def run(cls, filename, dry_run, **kwargs): super(DjangoAdminTask, cls).run(filename, dry_run, **kwargs) command = cls.COMMAND arguments = cls.ARGS.format(filename=filename, **kwargs) output = cls.OUT.format(filename=filename, **kwargs) variables = cls.VARS.format(**kwargs) # --database={django_database} # if 'django_database' not in kwargs: # kwargs['django_database'] = 'default' cmd = clean_command(cls.CMD) cmd = cmd.format(command=command, output=output, arguments=arguments, variables=variables, **kwargs) log.info("Running django command %s.", cmd) if not dry_run: execute_shell(cmd, **kwargs) return cmd
def run(cls, filename, dry_run, **kwargs): super(CopyS3FileTask, cls).run(filename, dry_run, **kwargs) if not distutils.spawn.find_executable("aws"): raise FatalTaskError("The {0} task requires the awscli".format( cls.__name__)) file_basename = os.path.basename(filename) s3_source_filename = '{prefix}/{env}/{filename}'.format( prefix=kwargs['external_prefix'], env=kwargs['environment'], filename=file_basename) s3_marker_filename = '{prefix}/{env}/{filename}'.format( prefix=kwargs['external_prefix'], env=kwargs['environment'], filename='job_success/_SUCCESS') if dry_run: print('Copy S3 File: {0} to {1}'.format(s3_source_filename, filename)) else: # First check to see that the export data was successfully generated # by looking for a marker file for that run. Return a more severe failure, # so that the overall environment dump fails, rather than just the particular # file being copied. head_command = "aws s3api head-object --bucket {bucket} --key {key}" marker_command = head_command.format( bucket=kwargs['pipeline_bucket'], key=s3_marker_filename) source_command = head_command.format( bucket=kwargs['pipeline_bucket'], key=s3_source_filename) # This code block was commented because, the first time we ran this, # we don't have data related to the previous run, so # this always raises a FatalTaskError exception, and we don't want that. # try: # log.info("Running command with retries: %s.", marker_command) # # Define retries here, to recover from temporary outages when calling S3 to find files. # local_kwargs = dict(**kwargs) # local_kwargs['max_tries'] = MAX_TRIES_FOR_MARKER_FILE_CHECK # execute_shell(marker_command, **local_kwargs) # except subprocess.CalledProcessError: # error_message = 'Unable to find success marker for export {0}'.format(s3_marker_filename) # log.error(error_message) # raise FatalTaskError(error_message) # Then check that the source file exists. It's okay if it isn't, # as that will happen when a particular database table is empty. try: log.info("Running command %s.", source_command) execute_shell(source_command, **kwargs) except subprocess.CalledProcessError: log.info('Unable to find %s to copy.', s3_source_filename) else: try: cmd = 'aws s3 cp s3://{bucket}/{src} {dest}'.format( bucket=kwargs['pipeline_bucket'], src=s3_source_filename, dest=filename) # Define retries here, to recover from temporary outages when calling S3 to copy files. local_kwargs = dict(**kwargs) local_kwargs['max_tries'] = MAX_TRIES_FOR_COPY_FILE_FROM_S3 execute_shell(cmd, **local_kwargs) except subprocess.CalledProcessError: log.error('Unable to copy %s to %s', s3_source_filename, filename) raise