def test_resolve_catalogue_id(self): """Integration test for resolving catalogue ID""" # Make a catalogue with an ID catalogue = Catalogue(identifier='12345678-1234-5678-1234-567812345678') tempfile = NamedTemporaryFile(prefix='eumopps.catalogue.test.test_operation.TestOperation.', suffix='.nc') CatalogueWriterNetCDF().save(tempfile.name, catalogue) # A class with nested member structures including catalogue ID opmodule = OperationExample() opmodule.information = { 'apples': 3, 'some_details': { 'my_identity': OperationCatalogueID() } } # Operation defined on the example class op = Operation(runmodule=opmodule, step=StepDaily(start='20171112000000', end='20171118000000')) # Resolve to list of operations op.resolve_operation_references(catalogue) # The static info should be unchanged # And should still have operation ID at this point self.assertEqual(3, op.runmodule.information['apples']) self.assertIsInstance(op.runmodule.information['some_details']['my_identity'], OperationCatalogueID) # Then resolve to individual operation op.resolve_single_operation(tempfile.name, 5, 'timenow') # Static info should still be unchanged but should have ID now self.assertEqual(3, op.runmodule.information['apples']) self.assertEqual('12345678-1234-5678-1234-567812345678', op.runmodule.information['some_details']['my_identity'])
def test_resolve_single_operation(self): # Test catalogue with known ID catalogue = Catalogue( identifier=str(UUID('{12345678-1234-5678-1234-567812345678}'))) tempfile = NamedTemporaryFile( prefix= 'eumopps.catalogue.test.test_operationparameters.TestOperationCatalogueID.', suffix='.nc') CatalogueWriterNetCDF().save(tempfile.name, catalogue) # Check the resolved ID matches (and operation index should be ignored here) result = OperationCatalogueID().resolve_single_operation( tempfile.name, 527) self.assertEqual('12345678-1234-5678-1234-567812345678', result)
def test_operation_output_resolve(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExampleDataset', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d.bin'])) ], ) ]) step = StepAnnual(start=datetime(2001, 12, 30), end=datetime(2002, 01, 02)) output_placeholder = AnnualBatchDaysOutput('MyExampleDataset', 0) print output_placeholder.subsetindex operation_indices = [0, 1] output_entries = output_placeholder.operation_output_resolve( catalogue, step, operation_indices) print output_entries print zip([(entry.name, entry.time) for entry in catalogue.datasets[0].subsets[0].matches]) # Check the details appended to catalogue self.assertEqual( ('2001', 'examplefile_20011230.bin'), os.path.split(catalogue.datasets[0].subsets[0].matches[0].name)) self.assertEqual( ('2001', 'examplefile_20011231.bin'), os.path.split(catalogue.datasets[0].subsets[0].matches[1].name)) self.assertEqual( ('2002', 'examplefile_20020101.bin'), os.path.split(catalogue.datasets[0].subsets[0].matches[2].name)) self.assertEqual( ('2002', 'examplefile_20020102.bin'), os.path.split(catalogue.datasets[0].subsets[0].matches[3].name)) self.assertEqual(datetime(2001, 12, 30), catalogue.datasets[0].subsets[0].matches[0].time) self.assertEqual(datetime(2001, 12, 31), catalogue.datasets[0].subsets[0].matches[1].time) self.assertEqual(datetime(2002, 01, 01), catalogue.datasets[0].subsets[0].matches[2].time) self.assertEqual(datetime(2002, 01, 02), catalogue.datasets[0].subsets[0].matches[3].time)
def test_resolve_fail_multiple(self): """Check it fails if there are multiple inputs""" # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry( name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20171113000000', end='20171118000000') self.assertEqual(6, step.count()) # Build class example = InputFile('MyExample', 1, InputFileList.MISSING_DATA_SKIP) # Attempt find request_skip = [False, False, False, False, False, False] with self.assertRaises(OperationException): example.operation_input_resolve(request_skip, catalogue, step)
def test_resolve_fail_missing(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry( name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20171113000000', end='20171118000000') self.assertEqual(6, step.count()) # Build class - default behaviour should be to disallow missing data example = InputFileList('MyExample', 1) # Attempt find - should raise exception with self.assertRaises(OperationException): request_skip = [False, False, False, False, False, False] example.operation_input_resolve(request_skip, catalogue, step)
def test_find_references(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExampleDataset', path='/some/path', subsets=[ CatalogueDataSubset( layout=DataStorageFiles(patterns=['allthesame.bin']), matches=[ CatalogueFileEntry(name='allthesame.bin', time=datetime(2000, 1, 1)) ], ), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y.bin']), matches=[ CatalogueFileEntry( name='2000/examplefile_2000.bin', time=datetime(2000, 1, 1)), CatalogueFileEntry( name='2001/examplefile_2001.bin', time=datetime(2001, 1, 1)), CatalogueFileEntry( name='2002/examplefile_2002.bin', time=datetime(2002, 1, 1)), CatalogueFileEntry( name='2003/examplefile_2003.bin', time=datetime(2003, 1, 1)), ]) ]) ]) catalogue = Catalogue([ CatalogueDataSet( name='MyExampleDataset', path='/some/path', subsets=[ CatalogueDataSubset( layout=DataStorageFiles(patterns=['allthesame.bin']), matches=[ CatalogueFileEntry(name='allthesame.bin', time=None) ], ), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d.bin']), matches=[ CatalogueFileEntry( name='2001/examplefile_20010101.bin', time=datetime(2001, 1, 1)), CatalogueFileEntry( name='2001/examplefile_20010101.bin', time=datetime(2001, 1, 2)), CatalogueFileEntry( name='2001/examplefile_20010101.bin', time=datetime(2001, 1, 3)), CatalogueFileEntry( name='2001/examplefile_20010101.bin', time=datetime(2002, 1, 2)), ]) ]) ]) step = StepAnnual(start=datetime(2001, 01, 01), end=datetime(2002, 02, 01)) print step.count() inputhandler = AnnualBatchDays( 'MyExampleDataset', subsetindex=1, missing_data=AnnualBatchDays.MISSING_DATA_ALLOWED) request_skip = [False] * step.count() print inputhandler.find_references(request_skip, catalogue, step) print inputhandler
def test_resolve_span_days(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y.bin']), matches=[ CatalogueFileEntry( name='2000/examplefile_2000.bin', time=datetime(2000, 1, 1)), CatalogueFileEntry( name='2001/examplefile_2001.bin', time=datetime(2001, 1, 1)), CatalogueFileEntry( name='2002/examplefile_2002.bin', time=datetime(2002, 1, 1)), CatalogueFileEntry( name='2003/examplefile_2003.bin', time=datetime(2003, 1, 1)), ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20011231000000', end='20020102000000') self.assertEqual(3, step.count()) # Build class example = InputFileList('MyExample', 1, InputFileList.MISSING_DATA_SKIP) # Attempt find request_skip = [False, False, False] dataref = example.operation_input_resolve(request_skip, catalogue, step) # Should be a parameters object (which can later be resolved to single operation) self.assertIsInstance(dataref, OperationParameter) self.assertIsInstance(dataref, OperationFileListReference) # Check skip requests worked ok self.assertEqual([False, False, False], request_skip) # Should have one on each step self.assertEqual(3, len(dataref.operation_parameters)) self.assertEqual([3, 3, 3], [ len(operation_refs) for operation_refs in dataref.operation_parameters ]) # Should refer to given dataset self.assertEqual([0, 1, 1], dataref.operation_parameters[0]) self.assertEqual([0, 1, 2], dataref.operation_parameters[1]) self.assertEqual([0, 1, 2], dataref.operation_parameters[2])
def test_operation_output_resolve(self): # Make a catalogue with one non-empty data set (as in input test) # and one empty data set ready for outputs catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry( name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]), CatalogueDataSet(name='SomeOutput', path='/new/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['notused'])), CatalogueDataSubset(layout=DataStorageFiles( patterns=['alsonotused'])), CatalogueDataSubset(layout=DataStorageFiles( patterns=['%Y', 'my_output_%Y%m%d.bin'])) ]) ]) # Stepper across 6 days step = StepDaily(start='20171113000000', end='20171118000000') # This should output to subset 2 of second dataset example_output = OutputFile('SomeOutput', 2) # Request indices 2, 3, 5 result = example_output.operation_output_resolve( catalogue, step, [2, 3, 5]) # Should populate the catalogue with matches self.assertEqual(3, len(catalogue.datasets[1].subsets[2].matches)) # Check the details appended to catalogue self.assertEqual( ('2017', 'my_output_20171115.bin'), os.path.split(catalogue.datasets[1].subsets[2].matches[0].name)) self.assertEqual( ('2017', 'my_output_20171116.bin'), os.path.split(catalogue.datasets[1].subsets[2].matches[1].name)) self.assertEqual( ('2017', 'my_output_20171118.bin'), os.path.split(catalogue.datasets[1].subsets[2].matches[2].name)) self.assertEqual(datetime(2017, 11, 15), catalogue.datasets[1].subsets[2].matches[0].time) self.assertEqual(datetime(2017, 11, 16), catalogue.datasets[1].subsets[2].matches[1].time) self.assertEqual(datetime(2017, 11, 18), catalogue.datasets[1].subsets[2].matches[2].time) # Also check the reference list correctly describes these self.assertEqual(3, len(result.operation_parameters)) self.assertEqual([3, 3, 3], [ len(operation_refs) for operation_refs in result.operation_parameters ]) self.assertEqual([1, 2, 0], result.operation_parameters[0]) self.assertEqual([1, 2, 1], result.operation_parameters[1]) self.assertEqual([1, 2, 2], result.operation_parameters[2])
def test_resolve_allow_missing(self): """Check it fails if there are multiple inputs""" # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171110_12.bin', time=datetime(2017, 10, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171115_06.bin', time=datetime(2017, 11, 15, 6)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20171113000000', end='20171118000000') self.assertEqual(6, step.count()) # Build class example = InputFile('MyExample', 1, InputFileList.MISSING_DATA_ALLOWED) # Attempt find request_skip = [False, False, False, False, False, False] dataref = example.operation_input_resolve(request_skip, catalogue, step) # Should be a parameters object (which can later be resolved to single operation) self.assertIsInstance(dataref, OperationParameter) self.assertIsInstance(dataref, OperationFileReference) # Check no skip requested self.assertEqual([False, False, False, False, False, False], request_skip) # Should have one on 14th, 15th, 17th # There's also one on 10th but that isn't used self.assertEqual(6, len(dataref.operation_parameters)) #self.assertEqual([ 0, 3, 3, 0, 3, 0 ], [ len(operation_refs) for operation_refs in dataref.operation_parameters ]) self.assertEqual( [3, 3, 3, 3, 3, 3], [ len(operation_refs) for operation_refs in dataref.operation_parameters ] ) # There should now always be at least 3 operation refs for MISSING_DATA_ALLOWED # Should refer to given dataset self.assertEqual([0, 1, 1], dataref.operation_parameters[1]) self.assertEqual([0, 1, 2], dataref.operation_parameters[2]) self.assertEqual([0, 1, 3], dataref.operation_parameters[4])
def test_resolve_skip(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry( name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20171113000000', end='20171118000000') self.assertEqual(6, step.count()) # Build class example = InputFileList('MyExample', 1, InputFileList.MISSING_DATA_SKIP) # Attempt find request_skip = [False, False, False, False, False, False] dataref = example.operation_input_resolve(request_skip, catalogue, step) # Should be a parameters object (which can later be resolved to single operation) self.assertIsInstance(dataref, OperationParameter) self.assertIsInstance(dataref, OperationFileListReference) # Check skip requests worked ok self.assertEqual([False, False, False, True, False, True], request_skip) # Should have one on 13th, 15th, 17th and two on 14th self.assertEqual(6, len(dataref.operation_parameters)) self.assertEqual([3, 6, 3, 0, 3, 0], [ len(operation_refs) for operation_refs in dataref.operation_parameters ]) # Should refer to given dataset self.assertEqual([0, 1, 0], dataref.operation_parameters[0]) self.assertEqual([0, 1, 1, 0, 1, 2], dataref.operation_parameters[1]) self.assertEqual([0, 1, 3], dataref.operation_parameters[2]) self.assertEqual([0, 1, 4], dataref.operation_parameters[4])
def test_resolve_allow_missing(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry( name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Make an input stepper that works across 6 days (of which the catalogue has data for 4) step = StepDaily(start='20171113000000', end='20171118000000') self.assertEqual(6, step.count()) # Build class - default behaviour should be to disallow missing data example = InputFileList( 'MyExample', 1, missing_data=InputFileList.MISSING_DATA_ALLOWED) # Attempt find request_skip = [False, False, False, False, False, False] dataref = example.operation_input_resolve(request_skip, catalogue, step) # Should be a parameters object (which can later be resolved to single operation) self.assertIsInstance(dataref, OperationParameter) self.assertIsInstance(dataref, OperationFileListReference) # Should not request any skip even though some items lack all data self.assertEqual([False, False, False, False, False, False], request_skip) # Should have one on 13th, 15th, 17th and two on 14th self.assertEqual(6, len(dataref.operation_parameters)) #self.assertEqual([ 3, 6, 3, 0, 3, 0 ], [ len(operation_refs) for operation_refs in dataref.operation_parameters ]) self.assertEqual( [3, 6, 3, 3, 3, 3], [ len(operation_refs) for operation_refs in dataref.operation_parameters ] ) # now valid operation refs (albeit using -1 to indicate a missing inputfile) # Should refer to given dataset self.assertEqual([0, 1, 0], dataref.operation_parameters[0]) self.assertEqual([0, 1, 1, 0, 1, 2], dataref.operation_parameters[1]) self.assertEqual([0, 1, 3], dataref.operation_parameters[2]) self.assertEqual([0, 1, 4], dataref.operation_parameters[4]) # We now use -1 to indicate missing inputs to prevent operations with no inputs from being excluded from the catalogue on NetCDF write self.assertEqual([0, 1, -1], dataref.operation_parameters[3]) self.assertEqual([0, 1, -1], dataref.operation_parameters[5])
def test_resolve_operation_references_single_filelist(self): """Example resolving references for one input data set, one filelist parameter and one output.""" # Make a catalogue with one non-empty data set (as in input test) catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles(patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles(patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry(name='2017/examplefile_20171113_12.bin', time=datetime(2017, 11, 13, 12)), CatalogueFileEntry(name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry(name='2017/examplefile_20171114_18.bin', time=datetime(2017, 11, 14, 18)), CatalogueFileEntry(name='2017/examplefile_20171115_12.bin', time=datetime(2017, 11, 15, 12)), CatalogueFileEntry(name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]), ]) # New datasets to be build by the operation newdatasets = [ CatalogueDataSet( name='SomeOutput', path='/new/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles(patterns=['notused'])), CatalogueDataSubset(layout=DataStorageFiles(patterns=['alsonotused'])), CatalogueDataSubset(layout=DataStorageFiles(patterns=['%Y', 'my_output_%Y%m%d.bin']))]) ] # Stepper across 6 days step = StepDaily(start='20171113000000', end='20171118000000') # A class with nested member structures op = OperationExample() op.information = { 'apples': 3, 'something_nested': { 'not_a_bird': OutputFile('SomeOutput', 2) }, 'my_inputs': [ InputFileList('MyExample', 1, 'skip') ] } # Build operation run object oprun = Operation(runmodule=op, step=step, newdatasets=newdatasets) # Resolve the example class result = oprun.resolve_operation_references(catalogue) # The static info should be unchanged self.assertEqual(3, result.information['apples']) # Get input and output refs self.assertEqual(1, len(result.information['my_inputs'])) inputref = result.information['my_inputs'][0] self.assertIsInstance(inputref, OperationFileListReference) self.assertEqual(4, len(inputref.operation_parameters)) self.assertEqual([ 3, 6, 3, 3 ], [ len(operation_refs) for operation_refs in inputref.operation_parameters ]) self.assertEqual([ 0, 1, 0 ], inputref.operation_parameters[0]) self.assertEqual([ 0, 1, 1, 0, 1, 2 ], inputref.operation_parameters[1]) self.assertEqual([ 0, 1, 3 ], inputref.operation_parameters[2]) self.assertEqual([ 0, 1, 4 ], inputref.operation_parameters[3])
def test_resolve_operation_references_list_of_files(self): """Example resolving references for one input data set, a list of individual files, and one output.""" # Make a catalogue with two data sets and presence of data like: # Date : 12 13 14 15 16 17 18 # ExampleOne: - + + + - + - # ExampleTwo: + - + + - + - catalogue = Catalogue([ CatalogueDataSet( name='ExampleOne', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles(patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles(patterns=['%Y', 'exampleone_%Y%m%d.bin']), matches=[ CatalogueFileEntry(name='2017/exampleone_20171113.bin', time=datetime(2017, 11, 13)), CatalogueFileEntry(name='2017/exampleone_20171114.bin', time=datetime(2017, 11, 14)), CatalogueFileEntry(name='2017/exampleone_20171115.bin', time=datetime(2017, 11, 15)), CatalogueFileEntry(name='2017/exampleone_20171117.bin', time=datetime(2017, 11, 17)) ]) ]), CatalogueDataSet( name='ExampleTwo', path='/another/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles(patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles(patterns=['%Y', 'exampletwo_%Y%m%d.bin']), matches=[ CatalogueFileEntry(name='2017/exampletwo_20171112.bin', time=datetime(2017, 11, 12)), CatalogueFileEntry(name='2017/exampletwo_20171114.bin', time=datetime(2017, 11, 14)), CatalogueFileEntry(name='2017/exampletwo_20171115.bin', time=datetime(2017, 11, 15)), CatalogueFileEntry(name='2017/exampletwo_20171117.bin', time=datetime(2017, 11, 17)) ]) ]) ]) # New datasets to be build by the operation newdatasets = [ CatalogueDataSet( name='SomeOutput', path='/new/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles(patterns=['%Y', 'my_output_%Y%m%d.bin']))]) ] # Stepper across 7 days step = StepDaily(start='20171112000000', end='20171118000000') # A class with nested member structures op = OperationExample() op.information = { 'apples': 3, 'the_result': OutputFile('SomeOutput'), 'my_inputs': [ InputFile('ExampleOne', 1, 'allowed'), InputFile('ExampleTwo', 1, 'allowed') ] } # Build operation run object oprun = Operation(runmodule=op, step=step, newdatasets=newdatasets) # Resolve the example class result = oprun.resolve_operation_references(catalogue) # The static info should be unchanged self.assertEqual(3, result.information['apples']) # Get input refs self.assertEqual(2, len(result.information['my_inputs'])) refone = result.information['my_inputs'][0] reftwo = result.information['my_inputs'][1] # Check types and sizes self.assertIsInstance(refone, OperationFileReference) self.assertIsInstance(reftwo, OperationFileReference) self.assertEqual(7, len(refone.operation_parameters)) self.assertEqual(7, len(reftwo.operation_parameters)) ## Pattern of first data #self.assertEqual([ 0, 3, 3, 3, 0, 3, 0 ], [ len(operation_refs) for operation_refs in refone.operation_parameters ]) #self.assertEqual([ 0, 3, 3, 3, 0, 3, 0 ], [ len(operation_refs) for operation_refs in refone.operation_parameters ]) #self.assertEqual([ 0, 1, 0 ], refone.operation_parameters[1]) #self.assertEqual([ 0, 1, 1 ], refone.operation_parameters[2]) #self.assertEqual([ 0, 1, 2 ], refone.operation_parameters[3]) #self.assertEqual([ 0, 1, 3 ], refone.operation_parameters[5]) ## Pattern of second data #self.assertEqual([ 3, 0, 3, 3, 0, 3, 0 ], [ len(operation_refs) for operation_refs in reftwo.operation_parameters ]) #self.assertEqual([ 1, 1, 0 ], reftwo.operation_parameters[0]) #self.assertEqual([ 1, 1, 1 ], reftwo.operation_parameters[2]) #self.assertEqual([ 1, 1, 2 ], reftwo.operation_parameters[3]) #self.assertEqual([ 1, 1, 3 ], reftwo.operation_parameters[5]) # update for missing indicator (-1) when missing data is allowed # Pattern of first data self.assertEqual([ 3, 3, 3, 3, 3, 3, 3 ], [ len(operation_refs) for operation_refs in refone.operation_parameters ]) self.assertEqual([ 0, 1, -1 ], refone.operation_parameters[0]) self.assertEqual([ 0, 1, 0 ], refone.operation_parameters[1]) self.assertEqual([ 0, 1, 1 ], refone.operation_parameters[2]) self.assertEqual([ 0, 1, 2 ], refone.operation_parameters[3]) self.assertEqual([ 0, 1, -1 ], refone.operation_parameters[4]) self.assertEqual([ 0, 1, 3 ], refone.operation_parameters[5]) ## Pattern of second data self.assertEqual([ 3, 3, 3, 3, 3, 3, 3 ], [ len(operation_refs) for operation_refs in reftwo.operation_parameters ]) self.assertEqual([ 1, 1, 0 ], reftwo.operation_parameters[0]) self.assertEqual([ 1, 1, -1 ], reftwo.operation_parameters[1]) self.assertEqual([ 1, 1, 1 ], reftwo.operation_parameters[2]) self.assertEqual([ 1, 1, 2 ], reftwo.operation_parameters[3]) self.assertEqual([ 1, 1, -1 ], reftwo.operation_parameters[4]) self.assertEqual([ 1, 1, 3 ], reftwo.operation_parameters[5])
def test_resolve_single_operation(self): # Make a catalogue with one data set and two subsets catalogue = Catalogue([ CatalogueDataSet( name='MyExample', path='/some/path', subsets=[ CatalogueDataSubset(layout=DataStorageFiles( patterns=['allthesame'])), CatalogueDataSubset( layout=DataStorageFiles( patterns=['%Y', 'examplefile_%Y%m%d_%H.bin']), matches=[ CatalogueFileEntry( name='2017/examplefile_20171110_12.bin', time=datetime(2017, 10, 13, 12)), CatalogueFileEntry( name='2017/examplefile_20171114_12.bin', time=datetime(2017, 11, 14, 12)), CatalogueFileEntry( name='2017/examplefile_20171115_06.bin', time=datetime(2017, 11, 15, 6)), CatalogueFileEntry( name='2017/examplefile_20171117_12.bin', time=datetime(2017, 11, 17, 12)) ]) ]) ]) # Store it to temp file tempfile = NamedTemporaryFile( prefix= 'eumopps.catalogue.test.test_operationparameters.TestOperationFileReference.', suffix='.nc') CatalogueWriterNetCDF().save(tempfile.name, catalogue) # Test parameters (as if loaded from file) operation_parameters = [[], [0, 1, 1], [0, 1, 2], [], [0, 1, 3], []] # Check resolution self.assertIsNone( OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 0)) self.assertEqual( '/some/path/2017/examplefile_20171114_12.bin', OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 1)) self.assertEqual( '/some/path/2017/examplefile_20171115_06.bin', OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 2)) self.assertIsNone( OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 3)) self.assertEqual( '/some/path/2017/examplefile_20171117_12.bin', OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 4)) self.assertIsNone( OperationFileReference( operation_parameters).resolve_single_operation( tempfile.name, 5))