def execute_callable(self): """ Calls the python callable with the given arguments. Replaces the real path with a temp path and then moves that temp file to self.output_path when write is completed :return: the output path :rtype: any """ # if path is a file if not self.output_path.endswith(os.sep): # create all directories above output file parent_dir = os.path.dirname(self.output_path) if not os.path.exists(parent_dir): os.makedirs(parent_dir) # write atomically and insert output_path into python_callable with atomic_write(self.output_path, as_file=False) as f: return self.python_callable(*self.op_args, output_path=f, **self.op_kwargs) # if path is a directory else: # create all directories above our output dir (need to remove ending / to # get expected behavior) parent_dir = os.path.dirname(self.output_path.rstrip(os.sep)) if not os.path.exists(parent_dir): os.makedirs(parent_dir) with atomic_dir_create(self.output_path) as d: return self.python_callable(*self.op_args, output_path=d, **self.op_kwargs)
def test_file_exists(self): """Ensure an error is raised when a file exists""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") existing_file = open(os.path.join(tmp, "asdf.txt"), "w+") existing_file.close() with self.assertRaises(FileExistsError): with atomic_write(fp) as f: print("Running test...")
def parquet_conv(filename, cwd=os.getcwd(), datasourceformat=".xlsx"): """Converts a file of .xlsx or .csv into .parquet and reads prints/returns the first column :param filename: base filename to be converted to .parquet :param cwd: current working directory :param datasourceformat: what format the datasource comes in :return: the requested column from pset instructions """ parquetfilename = filename + ".parquet" data_wd = os.path.abspath(os.path.join(cwd, "data")) data_source = os.path.join(data_wd, filename + datasourceformat) try: df = pd.read_csv(data_source) except: df = pd.read_excel(data_source) atomic_write(fastparquet.write(parquetfilename, df, compression=None)) result = pd.read_parquet(parquetfilename, engine="fastparquet", columns=["hashed_id"]) print(result) return result
def test_atomic_failure(self): """Ensure that file does not exist after failure during write""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with self.assertRaises(FakeFileFailure): with atomic_write(fp, "w") as f: tmpfile = f.name assert os.path.exists(tmpfile) raise FakeFileFailure() assert not os.path.exists(tmpfile) assert not os.path.exists(fp)
def convert_xls_to_parquet(xls_file, sheet): """ Convert the provided xls sheet to a parquet file :param xls_file: :param sheet: :return: parquet file name """ with open(xls_file, "r+b") as fp: df = pd.read_excel(fp, sheet_name=sheet) parquet_file = get_parquet_file_name(xls_file) with atomic_write(parquet_file, as_file=False) as pf: df.to_parquet(pf) return parquet_file
def test_atomic_write(self): """Ensure file exists after being written successfully""" with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "asdf.txt") with atomic_write(fp) as f: assert not os.path.exists(fp) tmpfile = f.name f.write("asdf") assert not os.path.exists(tmpfile) assert os.path.exists(fp) with open(fp) as f: self.assertEqual(f.read(), "asdf")
def test_check_suffix(self): """check to make sure file has suffix""" file_suffix = ".txt" file_name = "asdf" full_file_name = file_name + file_suffix with TemporaryDirectory() as tmp: fp = os.path.join(tmp, full_file_name) with atomic_write(fp) as f: assert not os.path.exists(fp) tmpfile = f.name root, ext = os.path.splitext(tmpfile) self.assertEqual(ext, file_suffix)
def test_read_target(self): with TemporaryDirectory() as tmp: fp = os.path.join(tmp, "test.csv") tempFileContents = b"a,b,c,d\n" b"1,2,3,4\n" b"5,6,7,8\n" class MockTargetOutputTask(ExternalTask): output = TargetOutput( target_class=CSVTarget, file_pattern=tmp, ext="", flag=None, glob="*.csv", ) with atomic_write(fp, "wb") as f: f.write(tempFileContents) target = MockTargetOutputTask() csv_target = target.output() self.assertTrue(isinstance(csv_target, CSVTarget)) df = csv_target.read_dask() rows, cols = df.compute().shape self.assertEqual(rows, 2) self.assertEqual(cols, 4)
def write_data(spec: dict, data_frame: DataFrame): output_path = spec['output']['file'] ext = Path(output_path).suffix kwargs = build_kwargs_write(spec, ext) with atomic_write(output_path, "w") as out: write_funcs[ext](data_frame, out.name, **kwargs)
def test_yield_temp_path_when_as_file_false(self): """Ensure atomic_write yields temp path when as_file is False.""" with atomic_write("test.txt", "w", False) as f: self.assertIsInstance(f, str)