Ejemplo n.º 1
0
    def create_local_file(self, award_type, source, agency_code,
                          generate_since):
        """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """
        logger.info('Generating CSV file with creations and modifications')

        # Create file paths and working directory
        timestamp = datetime.strftime(datetime.now(), '%Y%m%d%H%M%S%f')
        working_dir = '{}_{}_delta_gen_{}/'.format(settings.CSV_LOCAL_PATH,
                                                   agency_code, timestamp)
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        source_name = '{}_{}_delta'.format(
            award_type, VALUE_MAPPINGS['transactions']['download_name'])
        source_path = os.path.join(working_dir, '{}.csv'.format(source_name))

        # Create a unique temporary file with the raw query
        raw_quoted_query = generate_raw_quoted_query(
            source.row_emitter(None))  # None requests all headers
        csv_query_annotated = self.apply_annotations_to_sql(
            raw_quoted_query, source.human_names)
        (temp_sql_file,
         temp_sql_file_path) = tempfile.mkstemp(prefix='bd_sql_', dir='/tmp')
        with open(temp_sql_file_path, 'w') as file:
            file.write('\\copy ({}) To STDOUT with CSV HEADER'.format(
                csv_query_annotated))

        # Generate the csv with \copy
        cat_command = subprocess.Popen(['cat', temp_sql_file_path],
                                       stdout=subprocess.PIPE)
        subprocess.check_output([
            'psql', '-o', source_path, os.environ['DOWNLOAD_DATABASE_URL'],
            '-v', 'ON_ERROR_STOP=1'
        ],
                                stdin=cat_command.stdout,
                                stderr=subprocess.STDOUT)

        # Append deleted rows to the end of the file
        self.add_deletion_records(source_path, working_dir, award_type,
                                  agency_code, source, generate_since)
        if csv_row_count(source_path, has_header=True) > 0:
            # Split the CSV into multiple files and zip it up
            zipfile_path = '{}{}_{}_Delta_{}.zip'.format(
                settings.CSV_LOCAL_PATH, agency_code, award_type,
                datetime.strftime(date.today(), '%Y%m%d'))
            logger.info('Creating compressed file: {}'.format(
                os.path.basename(zipfile_path)))
            split_and_zip_csvs(zipfile_path, source_path, source_name)
        else:
            zipfile_path = None

        os.close(temp_sql_file)
        os.remove(temp_sql_file_path)
        shutil.rmtree(working_dir)

        return zipfile_path
Ejemplo n.º 2
0
def generate_temp_query_file(source_query, limit, source, download_job, columns):
    if limit:
        source_query = source_query[:limit]
    csv_query_annotated = apply_annotations_to_sql(generate_raw_quoted_query(source_query), source.columns(columns))

    write_to_log(message='Creating PSQL Query: {}'.format(csv_query_annotated), download_job=download_job,
                 is_debug=True)

    # Create a unique temporary file to hold the raw query, using \copy
    (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix='bd_sql_', dir='/tmp')
    with open(temp_sql_file_path, 'w') as file:
        file.write('\copy ({}) To STDOUT with CSV HEADER'.format(csv_query_annotated))

    return temp_sql_file, temp_sql_file_path