def ofmvol2csv_main( volfiles: Union[str, List[str]], output: str, includefileorigin: bool = False ) -> None: """Convert a set of volfiles (or wildcard patterns) into one CSV file. Args: volfiles: A string or a list of strings, with filenames and/or wildcard patterns. output: Filename to write to, in CSV format. includefileorigin: Whether to add a column with the originating volfile filename for each row of data. """ if isinstance(volfiles, str): volfiles = [volfiles] globbed = glob_patterns(volfiles) if set(globbed) != set(volfiles): logger.info("Wildcards expanded to: %s", str(globbed)) dframes = [] if not globbed: logger.warning("Filename(s) %s not found", str(volfiles)) return for filename in globbed: dframe = process_volfile(filename) if includefileorigin: dframe["OFMVOLFILE"] = filename if not dframe.empty: dframes.append(dframe) if dframes: alldata = pd.concat(dframes, sort=False).sort_index() alldata.to_csv(output) logger.info("Wrote %s rows to %s", str(len(alldata)), output) else: logger.warning("No data was extracted")
def run(self, *args): # pylint: disable=no-self-use """Parse with a simplified command line parser, for ERT only, call csv_merge_main()""" parser = get_ertwf_parser() args = parser.parse_args(args) logger.setLevel(logging.INFO) globbedfiles = glob_patterns(args.csvfiles) csv_merge_main(csvfiles=globbedfiles, output=args.output)
def csv2ofmvol_main(csvfilepatterns, output): """Convert a list of CSV files into one OFM vol-file. Arguments: csvfilepatterns (list): strings of filenames or filename wildcards. Can also be a single string. output (str): Filename to write to. Returns: bool: True if successful """ if isinstance(csvfilepatterns, str): csvfilepatterns = [csvfilepatterns] csvfiles = glob_patterns(csvfilepatterns) if set(csvfiles) != set(csvfilepatterns): logger.info("Wildcards used: %s", str(csvfilepatterns)) if not csvfiles: logger.error("No filenames found") return False logger.info("Input files: %s", " ".join(csvfiles)) data = read_pdm_csv_files(csvfiles) # Print warnings for suspicious data. Perhaps we should fail but difficult # to ascertain how downstream tools will react. check_consecutive_dates(data) # Convert dataframes to a multiline string: volstr = df2vol(data) with open(output, "w") as outfile: outfile.write("-- Data printed by csv2ofmvol at " + str(datetime.datetime.now()) + "\n") outfile.write("-- Input files: " + str(csvfiles) + "\n") outfile.write("\n") outfile.write(volstr) logger.info("Well count: %s", str(len(data.index.levels[0]))) logger.info("Date count: %s", str(len(data.index.levels[1]))) if len(data) > 1: startdate = data.index.levels[1].min() enddate = data.index.levels[1].max() delta = relativedelta(enddate, startdate) logger.info("Date range: %s --> %s", str(startdate.date()), str(enddate.date())) logger.info( " %s years, %s months, %s days.", str(delta.years), str(delta.months), str(delta.days), ) logger.info("Written %s lines to %s.", str(len(volstr) + 3), output) return True
def test_glob_patterns(tmp_path): """Test globbing filepatterns from a file with patterns""" os.chdir(tmp_path) dummyfiles = ["perm.grdecl", "poro.grdecl"] for dummyfile in dummyfiles: Path(dummyfile).write_text("", encoding="utf8") Path("filelist").write_text("*.grdecl", encoding="utf8") assert set(glob_patterns(parse_wildcardfile("filelist"))) == set(dummyfiles) Path("filelist_dups").write_text( """ *.grdecl poro.grdecl p*ecl perm.grdecl""", encoding="utf8", ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) Path("filelist_comments").write_text( "-- this is a comment\n*.grdecl\n# some comment", encoding="utf8" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) Path("filelist_comments").write_text( "# this is a comment\n*.grdecl\n# some comment", encoding="utf8" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) Path("filelist_comments").write_text( " # this is a comment\n*.grdecl # comment along pattern", encoding="utf8" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) with pytest.raises(IOError): parse_wildcardfile("") with pytest.raises(IOError): parse_wildcardfile("notthere")
def test_glob_patterns(tmpdir): """Test globbing filepatterns from a file with patterns""" tmpdir.chdir() dummyfiles = ["perm.grdecl", "poro.grdecl"] for dummyfile in dummyfiles: open(dummyfile, "w").write("") open("filelist", "w").write("*.grdecl") assert set(glob_patterns(parse_wildcardfile("filelist"))) == set(dummyfiles) open("filelist_dups", "w").write( """ *.grdecl poro.grdecl p*ecl perm.grdecl""" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) open("filelist_comments", "w").write( "-- this is a comment\n*.grdecl\n# some comment" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) open("filelist_comments", "w").write( "# this is a comment\n*.grdecl\n# some comment" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) open("filelist_comments", "w").write( " # this is a comment\n*.grdecl # comment along pattern" ) assert set(glob_patterns(parse_wildcardfile("filelist_dups"))) == set(dummyfiles) with pytest.raises(IOError): parse_wildcardfile("") with pytest.raises(IOError): parse_wildcardfile("notthere")