def execute_callable(self):
        """
        Calls the python callable with the given arguments. Replaces the real path with a temp path
        and then moves that temp file to self.output_path when write is completed

        :return: the output path
        :rtype: any
        """

        # if path is a file
        if not self.output_path.endswith(os.sep):
            # create all directories above output file
            parent_dir = os.path.dirname(self.output_path)
            if not os.path.exists(parent_dir):
                os.makedirs(parent_dir)
            # write atomically and insert output_path into python_callable
            with atomic_write(self.output_path, as_file=False) as f:

                return self.python_callable(*self.op_args,
                                            output_path=f,
                                            **self.op_kwargs)

        # if path is a directory
        else:
            # create all directories above our output dir (need to remove ending / to
            # get expected behavior)
            parent_dir = os.path.dirname(self.output_path.rstrip(os.sep))
            if not os.path.exists(parent_dir):
                os.makedirs(parent_dir)
            with atomic_dir_create(self.output_path) as d:
                return self.python_callable(*self.op_args,
                                            output_path=d,
                                            **self.op_kwargs)
    def test_file_exists(self):
        """Ensure an error is raised when a file exists"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")
            existing_file = open(os.path.join(tmp, "asdf.txt"), "w+")
            existing_file.close()

            with self.assertRaises(FileExistsError):
                with atomic_write(fp) as f:
                    print("Running test...")
Beispiel #3
0
def parquet_conv(filename, cwd=os.getcwd(), datasourceformat=".xlsx"):
    """Converts a file of .xlsx or .csv into .parquet and reads prints/returns the first column

    :param filename: base filename to be converted to .parquet
    :param cwd: current working directory
    :param datasourceformat: what format the datasource comes in
    :return: the requested column from pset instructions
    """
    parquetfilename = filename + ".parquet"
    data_wd = os.path.abspath(os.path.join(cwd, "data"))
    data_source = os.path.join(data_wd, filename + datasourceformat)
    try:
        df = pd.read_csv(data_source)
    except:
        df = pd.read_excel(data_source)

    atomic_write(fastparquet.write(parquetfilename, df, compression=None))
    result = pd.read_parquet(parquetfilename,
                             engine="fastparquet",
                             columns=["hashed_id"])
    print(result)
    return result
Beispiel #4
0
    def test_atomic_failure(self):
        """Ensure that file does not exist after failure during write"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            with self.assertRaises(FakeFileFailure):
                with atomic_write(fp, "w") as f:
                    tmpfile = f.name
                    assert os.path.exists(tmpfile)
                    raise FakeFileFailure()

            assert not os.path.exists(tmpfile)
            assert not os.path.exists(fp)
Beispiel #5
0
def convert_xls_to_parquet(xls_file, sheet):
    """
    Convert the provided xls sheet to a parquet file

    :param xls_file:
    :param sheet:
    :return: parquet file name
    """
    with open(xls_file, "r+b") as fp:
        df = pd.read_excel(fp, sheet_name=sheet)
        parquet_file = get_parquet_file_name(xls_file)
        with atomic_write(parquet_file, as_file=False) as pf:
            df.to_parquet(pf)

        return parquet_file
    def test_atomic_write(self):
        """Ensure file exists after being written successfully"""

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "asdf.txt")

            with atomic_write(fp) as f:
                assert not os.path.exists(fp)
                tmpfile = f.name
                f.write("asdf")

            assert not os.path.exists(tmpfile)
            assert os.path.exists(fp)

            with open(fp) as f:
                self.assertEqual(f.read(), "asdf")
    def test_check_suffix(self):
        """check to make sure file has suffix"""

        file_suffix = ".txt"
        file_name = "asdf"
        full_file_name = file_name + file_suffix

        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, full_file_name)

            with atomic_write(fp) as f:
                assert not os.path.exists(fp)
                tmpfile = f.name
                root, ext = os.path.splitext(tmpfile)

        self.assertEqual(ext, file_suffix)
Beispiel #8
0
    def test_read_target(self):
        with TemporaryDirectory() as tmp:
            fp = os.path.join(tmp, "test.csv")
            tempFileContents = b"a,b,c,d\n" b"1,2,3,4\n" b"5,6,7,8\n"

            class MockTargetOutputTask(ExternalTask):
                output = TargetOutput(
                    target_class=CSVTarget,
                    file_pattern=tmp,
                    ext="",
                    flag=None,
                    glob="*.csv",
                )

            with atomic_write(fp, "wb") as f:
                f.write(tempFileContents)

            target = MockTargetOutputTask()
            csv_target = target.output()
            self.assertTrue(isinstance(csv_target, CSVTarget))
            df = csv_target.read_dask()
            rows, cols = df.compute().shape
            self.assertEqual(rows, 2)
            self.assertEqual(cols, 4)
Beispiel #9
0
def write_data(spec: dict, data_frame: DataFrame):
    output_path = spec['output']['file']
    ext = Path(output_path).suffix
    kwargs = build_kwargs_write(spec, ext)
    with atomic_write(output_path, "w") as out:
        write_funcs[ext](data_frame, out.name, **kwargs)
Beispiel #10
0
 def test_yield_temp_path_when_as_file_false(self):
     """Ensure atomic_write yields temp path when as_file is False."""
     with atomic_write("test.txt", "w", False) as f:
         self.assertIsInstance(f, str)