def create_local_file(self, award_type, source, agency_code, generate_since):
        """ Generate complete file from SQL query and S3 bucket deletion files, then zip it locally """
        logger.info("Generating CSV file with creations and modifications")

        # Create file paths and working directory
        timestamp = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S%f")
        working_dir = f"{settings.CSV_LOCAL_PATH}_{agency_code}_delta_gen_{timestamp}/"
        if not os.path.exists(working_dir):
            os.mkdir(working_dir)
        agency_str = "All" if agency_code == "all" else agency_code
        source_name = f"FY(All)_{agency_str}_{award_type}_Delta_{datetime.strftime(date.today(), '%Y%m%d')}"
        source_path = os.path.join(working_dir, "{}.csv".format(source_name))

        # Create a unique temporary file with the raw query
        raw_quoted_query = generate_raw_quoted_query(source.row_emitter(None))  # None requests all headers

        # The raw query is a union of two other queries, each in parentheses. To do replacement we need to split out
        # each query, apply annotations to each of those, then recombine in a UNION
        csv_query_annotated = (
            "("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[0].strip()[1:-1], source.human_names)
            + ") UNION ("
            + apply_annotations_to_sql(_top_level_split(raw_quoted_query, "UNION")[1].strip()[1:-1], source.human_names)
            + ")"
        )

        (temp_sql_file, temp_sql_file_path) = tempfile.mkstemp(prefix="bd_sql_", dir="/tmp")
        with open(temp_sql_file_path, "w") as file:
            file.write("\\copy ({}) To STDOUT with CSV HEADER".format(csv_query_annotated))

        logger.info("Generated temp SQL file {}".format(temp_sql_file_path))
        # Generate the csv with \copy
        cat_command = subprocess.Popen(["cat", temp_sql_file_path], stdout=subprocess.PIPE)
        try:
            subprocess.check_output(
                ["psql", "-o", source_path, os.environ["DOWNLOAD_DATABASE_URL"], "-v", "ON_ERROR_STOP=1"],
                stdin=cat_command.stdout,
                stderr=subprocess.STDOUT,
            )
        except subprocess.CalledProcessError as e:
            logger.exception(e.output)
            raise e

        # Append deleted rows to the end of the file
        if not self.debugging_skip_deleted:
            self.add_deletion_records(source_path, working_dir, award_type, agency_code, source, generate_since)
        if count_rows_in_delimited_file(source_path, has_header=True, safe=True) > 0:
            # Split the CSV into multiple files and zip it up
            zipfile_path = "{}{}.zip".format(settings.CSV_LOCAL_PATH, source_name)

            logger.info("Creating compressed file: {}".format(os.path.basename(zipfile_path)))
            split_and_zip_data_files(zipfile_path, source_path, source_name, "csv")
        else:
            zipfile_path = None

        os.close(temp_sql_file)
        os.remove(temp_sql_file_path)
        shutil.rmtree(working_dir)

        return zipfile_path
Exemple #2
0
def test_top_level_split_nesting_logic():
    unnested_string = "sdfoing SPLT noivijf"
    assert _top_level_split(unnested_string, "SPLT") == ["sdfoing ", " noivijf"]
    nested_string_1 = "() blah SP pojf"
    assert _top_level_split(nested_string_1, "SP") == ["() blah ", " pojf"]
    nested_string_2 = "(SP) blah SP pojf"
    assert _top_level_split(nested_string_2, "SP") == ["(SP) blah ", " pojf"]
    nested_string_3 = "(SP) blah SP (pojf SP) SP"
    assert _top_level_split(nested_string_3, "SP") == ["(SP) blah ", " (pojf SP) SP"]
    nested_string_4 = "((SP)) blah ()SP (pojf SP) SP"
    assert _top_level_split(nested_string_4, "SP") == ["((SP)) blah ()", " (pojf SP) SP"]
Exemple #3
0
def test_top_level_split_unsplittable():
    unsplittable = "sdoiaghioweg oijfewiwje efoiwnovisper"
    with pytest.raises(Exception):
        _top_level_split(unsplittable, "not in this")
Exemple #4
0
def test_top_level_split_quote_logic():
    unquoted_string = "oinodsg *** vsdoij"
    assert _top_level_split(unquoted_string, "***") == ["oinodsg ", " vsdoij"]
    quoted_string_1 = 'oinodsg "***"*** vsdoij'
    assert _top_level_split(quoted_string_1,
                            "***") == ['oinodsg "***"', " vsdoij"]