Beispiel #1
0
def generate_export_query(source_query, limit, source, columns, file_format):
    if limit:
        source_query = source_query[:limit]
    query_annotated = apply_annotations_to_sql(
        generate_raw_quoted_query(source_query), source.columns(columns))
    options = FILE_FORMATS[file_format]["options"]
    return r"\COPY ({}) TO STDOUT {}".format(query_annotated, options)
    def create_local_file(self, award_type, source, agency_code, generate_since):
        """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """
        logger.info("Generating CSV file with creations and modifications")

        # Create file paths and working directory
        timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S%f")
        working_dir = f"{settings.CSV_LOCAL_PATH}_{agency_code}_delta_gen_{timestamp}/"
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        agency_str = "All" if agency_code == "all" else agency_code
        source_name = f"FY(All)_{agency_str}_{award_type}_Delta_{datetime.strftime(date.today(), '%Y%m%d')}"
        source_path = os.path.join(working_dir, "{}.csv".format(source_name))

        # Create a unique temporary file with the raw query
        raw_quoted_query = generate_raw_quoted_query(source.row_emitter(None))  # None requests all headers

        # The raw query is a union of two other queries, each in parentheses. To do replacement we need to split out
        # each query, apply annotations to each of those, then recombine in a UNION
        csv_query_annotated = (
            "("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[0].strip()[1:-1], source.human_names)
            + ") UNION ("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[1].strip()[1:-1], source.human_names)
            + ")"
        )

        (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix="bd_sql_", dir="/tmp")
        with open(temp_sql_file_path, "w") as file:
            file.write("\\copy ({}) To STDOUT with CSV HEADER".format(csv_query_annotated))

        logger.info("Generated temp SQL file {}".format(temp_sql_file_path))
        # Generate the csv with \copy
        cat_command = subprocess.Popen(["cat", temp_sql_file_path], stdout=subprocess.PIPE)
        try:
            subprocess.check_output(
                ["psql", "-o", source_path, os.environ["DOWNLOAD_DATABASE_URL"], "-v", "ON_ERROR_STOP=1"],
                stdin=cat_command.stdout,
                stderr=subprocess.STDOUT,
            )
        except subprocess.CalledProcessError as e:
            logger.exception(e.output)
            raise e

        # Append deleted rows to the end of the file
        if not self.debugging_skip_deleted:
            self.add_deletion_records(source_path, working_dir, award_type, agency_code, source, generate_since)
        if count_rows_in_delimited_file(source_path, has_header=True, safe=True) > 0:
            # Split the CSV into multiple files and zip it up
            zipfile_path = "{}{}.zip".format(settings.CSV_LOCAL_PATH, source_name)

            logger.info("Creating compressed file: {}".format(os.path.basename(zipfile_path)))
            split_and_zip_data_files(zipfile_path, source_path, source_name, "csv")
        else:
            zipfile_path = None

        os.close(temp_sql_file)
        os.remove(temp_sql_file_path)
        shutil.rmtree(working_dir)

        return zipfile_path
Beispiel #3
0
def generate_export_query(source_query, limit, source, columns, file_format,
                          generate_export_query_function):
    if limit:
        source_query = source_query[:limit]
    query_annotated = apply_annotations_to_sql(
        generate_raw_quoted_query(source_query), source.columns(columns))
    options = FILE_FORMATS[file_format]["options"]
    return generate_export_query_function(source, query_annotated, options)
Beispiel #4
0
def generate_temp_query_file(source_query, limit, source, download_job, columns):
    if limit:
        source_query = source_query[:limit]
    csv_query_annotated = apply_annotations_to_sql(generate_raw_quoted_query(source_query), source.columns(columns))

    write_to_log(
        message="Creating PSQL Query: {}".format(csv_query_annotated), download_job=download_job, is_debug=True
    )

    # Create a unique temporary file to hold the raw query, using \copy
    (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix="bd_sql_", dir="/tmp")
    with open(temp_sql_file_path, "w") as file:
        file.write(r"\copy ({}) To STDOUT with CSV HEADER".format(csv_query_annotated))

    return temp_sql_file, temp_sql_file_path
def fetch_all_category_counts(filters, category_to_model_dict):
    loop = asyncio.new_event_loop()
    results = {}
    for k, v in category_to_model_dict.items():
        queryset = matview_search_filter(
            filters, v).annotate(count=Count("*")).values("count")
        sql = generate_raw_quoted_query(queryset)

        # Django refuses to provide a viable option to exclude "GROUP BY ..." so it is stripped before running the SQL
        remove_groupby_string_index = sql.find("GROUP BY")
        results[k] = asyncio.ensure_future(async_run_select(
            sql[:remove_groupby_string_index]),
                                           loop=loop)

    all_statements = asyncio.gather(*[value for value in results.values()])
    loop.run_until_complete(all_statements)
    loop.close()

    return {k: v.result()[0]["count"] for k, v in results.items()}
Beispiel #6
0
    def create_local_file(self, award_type, source, agency_code,
                          generate_since):
        """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """
        logger.info("Generating CSV file with creations and modifications")

        # Create file paths and working directory
        timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S%f")
        working_dir = "{}_{}_delta_gen_{}/".format(settings.CSV_LOCAL_PATH,
                                                   agency_code, timestamp)
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        source_name = "{}_{}_Delta_{}".format(
            agency_code, award_type, datetime.strftime(date.today(), "%Y%m%d"))
        source_path = os.path.join(working_dir, "{}.csv".format(source_name))

        # Create a unique temporary file with the raw query
        raw_quoted_query = generate_raw_quoted_query(
            source.row_emitter(None))  # None requests all headers
        csv_query_annotated = self.apply_annotations_to_sql(
            raw_quoted_query, source.human_names)
        (temp_sql_file,
         temp_sql_file_path) = tempfile.mkstemp(prefix="bd_sql_", dir="/tmp")
        with open(temp_sql_file_path, "w") as file:
            file.write("\\copy ({}) To STDOUT with CSV HEADER".format(
                csv_query_annotated))

        logger.info("Generated temp SQL file {}".format(temp_sql_file_path))
        # Generate the csv with \copy
        cat_command = subprocess.Popen(["cat", temp_sql_file_path],
                                       stdout=subprocess.PIPE)
        try:
            subprocess.check_output(
                [
                    "psql", "-o", source_path,
                    os.environ["DOWNLOAD_DATABASE_URL"], "-v",
                    "ON_ERROR_STOP=1"
                ],
                stdin=cat_command.stdout,
                stderr=subprocess.STDOUT,
            )
        except subprocess.CalledProcessError as e:
            logger.exception(e.output)
            raise e

        # Append deleted rows to the end of the file
        self.add_deletion_records(source_path, working_dir, award_type,
                                  agency_code, source, generate_since)
        if count_rows_in_csv_file(source_path, has_header=True, safe=True) > 0:
            # Split the CSV into multiple files and zip it up
            zipfile_path = "{}{}.zip".format(settings.CSV_LOCAL_PATH,
                                             source_name)

            logger.info("Creating compressed file: {}".format(
                os.path.basename(zipfile_path)))
            split_and_zip_csvs(zipfile_path, source_path, source_name)
        else:
            zipfile_path = None

        os.close(temp_sql_file)
        os.remove(temp_sql_file_path)
        shutil.rmtree(working_dir)

        return zipfile_path