def test_input_dataset_files(self): inds = InputDatasetDesc('myinds', self.filenames.values()) actual = sorted(inds.files.keys()) expected = sorted(self.filenames.values()) print_test_message('InputDatasetDesc.files', actual=actual, expected=expected) self.assertEqual(actual, expected, 'InputDatasetDesc has wrong files')
def test_input_dataset_type(self): inds = InputDatasetDesc('myinds', self.filenames.values()) actual = type(inds) expected = InputDatasetDesc print_test_message('type(InputDatasetDesc)', actual=actual, expected=expected) self.assertEqual(actual, expected, 'InputDatasetDesc has wrong type')
def test_input_dataset_dimensions(self): inds = InputDatasetDesc("myinds", self.filenames.values()) actual = sorted(inds.dimensions.keys()) expected = sorted(self.dims.keys()) print_test_message( "InputDatasetDesc.dimensions", actual=actual, expected=expected ) self.assertEqual(actual, expected, "InputDatasetDesc has wrong dimensions")
def test_input_dataset_variable_files(self): inds = InputDatasetDesc('myinds', self.filenames.values()) actual = {v.name:v.files.keys() for v in inds.variables.itervalues()} expected = {'lat': ['u1.nc', 'u2.nc'], 'lon': ['u1.nc', 'u2.nc'], 'time': ['u1.nc', 'u2.nc'], 'u1': ['u1.nc'], 'u2': ['u2.nc']} print_test_message('InputDatasetDesc.variables.files', actual=actual, expected=expected) self.assertEqual(actual, expected, 'InputDatasetDesc has wrong variable files')
def run_PyConform(spec, file_glob, comm): failures = 0 ## Used the main function in pyconform to prepare the call spec_fn = spec.split(">>")[0] spec_d = spec.split(">>")[1] # get infiles infiles = [] for v in sorted(file_glob): infiles.append(v) # load spec json file dsdict = json.load(open(spec_fn,'r'), object_pairs_hook=OrderedDict) try: # Parse the output dataset outds = OutputDatasetDesc(dsdict=dsdict) # Parse the input dataset inpds = InputDatasetDesc(filenames=infiles) # Setup the PyConform data flow dataflow = DataFlow(inpds, outds) # Execute dataflow.execute(serial=True, scomm=comm) except UnitsError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except IndexError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except ValueError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except KeyError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except IOError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except NameError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except RuntimeError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 except TypeError as e: print ("ooo ERROR IN ",os.path.basename(spec_fn),str(e)) failures = failures+1 return failures
def test_input_dataset_variable_files(self): inds = InputDatasetDesc("myinds", self.filenames.values()) actual = {v.name: list(v.files.keys()) for v in inds.variables.values()} expected = { "lat": ["u1.nc", "u2.nc"], "lon": ["u1.nc", "u2.nc"], "time": ["u1.nc", "u2.nc"], "u1": ["u1.nc"], "u2": ["u2.nc"], } print_test_message( "InputDatasetDesc.variables.files", actual=actual, expected=expected ) self.assertEqual(actual, expected, "InputDatasetDesc has wrong variable files")
def main(argv=None): args = cli(argv) # Create the necessary SimpleComm scomm = create_comm(serial=args.serial) # Do setup only on manager node if scomm.is_manager(): # Check that the specfile exists if not exists(args.stdfile): raise OSError(("Output specification file {!r} not " "found").format(args.stdfile)) # Read the specfile into a dictionary print("Reading standardization file: {}".format(args.stdfile)) dsdict = json_load(open(args.stdfile, "r"), object_pairs_hook=OrderedDict) # Parse the output Dataset print( "Creating output dataset descriptor from standardization file...") outds = OutputDatasetDesc(dsdict=dsdict) else: outds = None # Send the output descriptor to all nodes outds = scomm.partition(outds, func=Duplicate(), involved=True) # Sync scomm.sync() # Continue setup only on manager node if scomm.is_manager(): # Gather the list of input files infiles = [] for infile in args.infiles: infiles.extend(glob(infile)) # If no input files, stop here if len(infiles) == 0: print("Standardization file validated.") return # Parse the input Dataset print( "Creating input dataset descriptor from {} input files...".format( len(infiles))) inpds = InputDatasetDesc(filenames=infiles) else: inpds = None # Send the input descriptor to all nodes inpds = scomm.partition(inpds, func=Duplicate(), involved=True) # Sync and continue process on all nodes scomm.sync() # Check for warn/error if args.error: simplefilter("error", ValidationWarning) # Try importing all of the necessary user-defined modules if args.module is not None: for i, modpath in enumerate(args.module): load_source("user{}".format(i), modpath) # Setup the PyConform data flow on all nodes if scomm.is_manager(): print("Creating the data flow...") dataflow = DataFlow(inpds, outds) # Execute the data flow (write to files) history = not args.no_history dataflow.execute( chunks=dict(args.chunks), scomm=scomm, history=history, deflate=args.deflate, debug=args.debug, )