def create_local_file(self, award_type, source, agency_code, generate_since):
        """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """
        logger.info("Generating CSV file with creations and modifications")

        # Create file paths and working directory
        timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S%f")
        working_dir = f"{settings.CSV_LOCAL_PATH}_{agency_code}_delta_gen_{timestamp}/"
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        agency_str = "All" if agency_code == "all" else agency_code
        source_name = f"FY(All)_{agency_str}_{award_type}_Delta_{datetime.strftime(date.today(), '%Y%m%d')}"
        source_path = os.path.join(working_dir, "{}.csv".format(source_name))

        # Create a unique temporary file with the raw query
        raw_quoted_query = generate_raw_quoted_query(source.row_emitter(None))  # None requests all headers

        # The raw query is a union of two other queries, each in parentheses. To do replacement we need to split out
        # each query, apply annotations to each of those, then recombine in a UNION
        csv_query_annotated = (
            "("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[0].strip()[1:-1], source.human_names)
            + ") UNION ("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[1].strip()[1:-1], source.human_names)
            + ")"
        )

        (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix="bd_sql_", dir="/tmp")
        with open(temp_sql_file_path, "w") as file:
            file.write("\\copy ({}) To STDOUT with CSV HEADER".format(csv_query_annotated))

        logger.info("Generated temp SQL file {}".format(temp_sql_file_path))
        # Generate the csv with \copy
        cat_command = subprocess.Popen(["cat", temp_sql_file_path], stdout=subprocess.PIPE)
        try:
            subprocess.check_output(
                ["psql", "-o", source_path, os.environ["DOWNLOAD_DATABASE_URL"], "-v", "ON_ERROR_STOP=1"],
                stdin=cat_command.stdout,
                stderr=subprocess.STDOUT,
            )
        except subprocess.CalledProcessError as e:
            logger.exception(e.output)
            raise e

        # Append deleted rows to the end of the file
        if not self.debugging_skip_deleted:
            self.add_deletion_records(source_path, working_dir, award_type, agency_code, source, generate_since)
        if count_rows_in_delimited_file(source_path, has_header=True, safe=True) > 0:
            # Split the CSV into multiple files and zip it up
            zipfile_path = "{}{}.zip".format(settings.CSV_LOCAL_PATH, source_name)

            logger.info("Creating compressed file: {}".format(os.path.basename(zipfile_path)))
            split_and_zip_data_files(zipfile_path, source_path, source_name, "csv")
        else:
            zipfile_path = None

        os.close(temp_sql_file)
        os.remove(temp_sql_file_path)
        shutil.rmtree(working_dir)

        return zipfile_path
def test_apply_annotations_to_sql_just_values():
    sql_string = str("SELECT one, two, three, four, five FROM table WHERE six = 'something'")
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        'SELECT one AS "alias_one", two AS "alias_two", three AS "alias_three", four AS '
        '"alias_four", five AS "alias_five" FROM table WHERE six = \'something\''
    )
    assert annotated_sql == annotated_string
Esempio n. 3
0
def test_apply_annotations_to_sql():
    sql_string = str(
        'SELECT "table"."col1", "table"."col2", (SELECT table2."three" FROM table_two table2 WHERE '
        'table2."code" = table."othercode") AS "alias_one" FROM table WHERE six = \'something\''
    )
    aliases = ["alias_one", "col1", "col2"]

    annotated_sql = apply_annotations_to_sql(sql_string, aliases)

    annotated_string = 'SELECT (SELECT table2."three" FROM table_two table2 WHERE table2."code" = table."othercode") AS "alias_one", "table"."col1" AS "col1", "table"."col2" AS "col2"FROM table WHERE six = \'something\''

    assert annotated_sql == annotated_string
def test_apply_annotations_to_sql_subquery():
    sql_string = str(
        'SELECT two, three, four, five, (SELECT table2."three" FROM table_two table2 WHERE '
        "table2.\"code\" = table.\"othercode\") AS 'alias_one' FROM table WHERE six = 'something'"
    )
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        'SELECT (SELECT table2."three" FROM table_two table2 WHERE table2."code" = '
        'table."othercode") AS "alias_one", two AS "alias_two", three AS "alias_three", '
        'four AS "alias_four", five AS "alias_five" FROM table WHERE six = \'something\''
    )
    assert annotated_sql == annotated_string
def test_apply_annotations_to_sql_concat_then_case():
    sql_string = str(
        "SELECT two, four, five, CONCAT(three, '-', not_three, '-', yes_three) AS \"alias_three\", CASE "
        "WHEN one = TRUE THEN '1' ELSE NULL END AS \"alias_one\" FROM table WHERE six = 'something'"
    )
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        'SELECT CASE WHEN one = TRUE THEN \'1\' ELSE NULL END AS "alias_one", two AS "alias_two", '
        "CONCAT(three, '-', not_three, '-', yes_three) AS \"alias_three\", four AS \"alias_four\", "
        "five AS \"alias_five\" FROM table WHERE six = 'something'"
    )
    assert annotated_sql == annotated_string
def test_apply_annotations_to_sql_multilevel_concat():
    sql_string = str(
        "SELECT one, two, four, five, CONCAT(three, '-', CONCAT(not_three, '-', yes_three)) AS "
        "\"alias_three\" FROM table WHERE six = 'something'"
    )
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        'SELECT one AS "alias_one", two AS "alias_two", CONCAT(three, \'-\', CONCAT(not_three, '
        '\'-\', yes_three)) AS "alias_three", four AS "alias_four", five AS "alias_five" FROM '
        "table WHERE six = 'something'"
    )
    assert annotated_sql == annotated_string
def test_apply_annotations_to_sql_just_concat():
    sql_string = str(
        "SELECT one, two, four, five, CONCAT(three, '-', not_three, '-', yes_three) AS \"alias_three\" "
        "FROM table WHERE six = 'something'"
    )
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        "SELECT one AS \"alias_one\", two AS \"alias_two\", CONCAT(three, '-', not_three, '-', "
        'yes_three) AS "alias_three", four AS "alias_four", five AS "alias_five" FROM table '
        "WHERE six = 'something'"
    )
    assert annotated_sql == annotated_string
def test_apply_annotations_to_sql_just_case():
    sql_string = str(
        "SELECT one, two, four, five, CASE WHEN three = TRUE THEN '3' ELSE NULL END AS \"alias_three\" "
        "FROM table WHERE six = 'something'"
    )
    aliases = ["alias_one", "alias_two", "alias_three", "alias_four", "alias_five"]

    annotated_sql = download_generation.apply_annotations_to_sql(sql_string, aliases)

    annotated_string = str(
        'SELECT one AS "alias_one", two AS "alias_two", CASE WHEN three = TRUE THEN \'3\' ELSE '
        'NULL END AS "alias_three", four AS "alias_four", five AS "alias_five" FROM table '
        "WHERE six = 'something'"
    )
    assert annotated_sql == annotated_string