def setUp(self): _i = dbapi.get_or_create(models.Institute, short_name='t', full_name='test') _j = dbapi.get_or_create(models.Institute, short_name='s', full_name='test')
def setUp(self): user = get_or_create(User, username='******') _p = get_or_create(models.DataSubmission, status=STATUS_VALUES['EXPECTED'], incoming_directory='/some/dir', directory='/other/dir', user=user)
def setUp(self): reporter = get_or_create(User, username='******') _p = get_or_create(models.DataIssue, issue='test', reporter=reporter, date_time=make_aware( datetime.datetime(2016, 8, 8, 8, 42, 37, 0), pytz.UTC, None))
def test_times_are_none(self): user = get_or_create(User, username='******') empty_sub = get_or_create(models.DataSubmission, status=STATUS_VALUES.ARRIVED, incoming_directory='/some/dir', directory='/some/dir', user=user) self.assertIsNone(empty_sub.start_time()) self.assertIsNone(empty_sub.end_time())
def setUp(self): data_file = _create_file_object() _cm1 = dbapi.get_or_create(models.Checksum, data_file=data_file, checksum_value='ABCD1234', checksum_type=CHECKSUM_TYPES['MD5']) _cm2 = dbapi.get_or_create(models.Checksum, data_file=data_file, checksum_value='4321DCBA', checksum_type=CHECKSUM_TYPES['SHA256'])
def setUp(self, mock_create_file): self.mock_create_file = mock_create_file user = get_or_create(User, username='******') self.ds = get_or_create(DataSubmission, incoming_directory='/dir', directory='/dir', user=user, status=STATUS_VALUES['PENDING_PROCESSING']) self.metadata = [ { 'file': 'file1' }, ] update_database_submission(self.metadata, self.ds)
def test_assign_data_issue(self): reporter = get_or_create(User, username='******') self.dsub.assign_data_issue('all files', reporter) for df in self.dsub.get_data_files(): di = df.dataissue_set.filter(issue='all files')[0] self.assertEqual(di.reporter.username, 'Lewis')
def test_get(self): cm2 = dbapi.get_or_create(models.ClimateModel, short_name='t', full_name='test') self.assertEqual(models.ClimateModel.objects.count(), 1) self.assertEqual(cm2.full_name, 'test') self.assertEqual(cm2.short_name, 't')
def _make_data_submission(): """ Create files and a data submission. Returns an DataSubmissionForTests object. """ test_dsub = datasets.test_data_submission test_dsub.create_test_files() user = get_or_create(User, username='******') dsub = get_or_create(DataSubmission, status=STATUS_VALUES.ARRIVED, incoming_directory=test_dsub.INCOMING_DIR, directory=test_dsub.INCOMING_DIR, user=user) for dfile_name in test_dsub.files: path = os.path.join(test_dsub.INCOMING_DIR, dfile_name) m = metadata = _extract_file_metadata(path) proj = get_or_create(Project, short_name="CMIP6", full_name="6th Coupled Model Intercomparison Project") climate_model = get_or_create(ClimateModel, short_name=m["climate_model"], full_name="Really good model") institute = get_or_create(Institute, short_name='MOHC', full_name='Met Office Hadley Centre') experiment = get_or_create(Experiment, short_name=m["experiment"], full_name="Really good experiment") var = get_or_create(VariableRequest, table_name=metadata['table'], long_name='very descriptive', units='1', var_name=metadata['var_id'], standard_name='var_name', cell_methods='time: mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name=metadata['var_id'], modeling_realm='atmos', frequency=FREQUENCY_VALUES['ann'], cell_measures='', uid='123abc') dreq = get_or_create(DataRequest, project=proj, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var, rip_code='r1i1p1f1', request_start_time=0.0, request_end_time=23400.0, time_units='days since 1950-01-01', calendar='360_day') act_id = get_or_create(ActivityId, short_name='HighResMIP', full_name='High Resolution Model Intercomparison Project') dfile = DataFile.objects.create(name=dfile_name, incoming_directory=test_dsub.INCOMING_DIR, directory=test_dsub.INCOMING_DIR, size=os.path.getsize(path), project=proj, climate_model=climate_model, institute=institute, experiment=experiment, variable_request=var, data_request=dreq, frequency=FREQUENCY_VALUES['ann'], rip_code=m["ensemble"], activity_id=act_id, start_time=m["start_time"], end_time=m["end_time"], time_units=m["time_units"], calendar=m["calendar"], data_submission=dsub, online=True, grid='gn') return test_dsub
def main(): """ Run the processing. """ credentials = get_credentials() http = credentials.authorize(httplib2.Http()) discovery_url = ('https://sheets.googleapis.com/$discovery/rest?' 'version=v4') service = discovery.build('sheets', 'v4', http=http, discoveryServiceUrl=discovery_url) sheet_names = [ 'Amon', 'LImon', 'Lmon', 'Omon', 'SImon', 'AERmon', 'CFmon', 'Emon', 'EmonZ', 'Primmon', 'PrimmonZ', 'PrimOmon', 'Oday', 'CFday', 'day', 'Eday', 'EdayZ', 'SIday', 'PrimdayPt', 'Primday', 'PrimOday', 'PrimSIday', '6hrPlev', '6hrPlevPt', 'PrimO6hr', 'Prim6hr', 'Prim6hrPt', '3hr', 'E3hr', 'E3hrPt', 'Prim3hr', 'Prim3hrPt', 'E1hr', 'Esubhr', 'Prim1hr', 'fx' ] for sheet in sheet_names: range_name = '{}!A2:AI'.format(sheet) result = list(service.spreadsheets().values()).get( spreadsheetId=SPREADSHEET_ID, range=range_name).execute() values = result.get('values', []) if not values: print('No data found.') else: for row in values: cmor_name = row[11] if row[11] else row[5] try: _vr = get_or_create(VariableRequest, table_name=sheet, long_name=row[1], units=row[2], var_name=row[5], standard_name=row[6], cell_methods=row[7], positive=row[8], variable_type=VARIABLE_TYPES[row[9]], dimensions=row[10], cmor_name=cmor_name, modeling_realm=row[12], frequency=FREQUENCY_VALUES[row[13]], cell_measures=row[14], uid=row[18]) except (KeyError, IndexError): # display some information to work out where the error # happened and then re-raise the exception to crash out print('cmor_name: {} table: {}'.format(row[11], sheet)) print(row) raise
def test_01_data_request(self): project = get_or_create(Project, short_name='CMIP6', full_name='Coupled Model Intercomparison Project Phase 6') institute = get_or_create(Institute, short_name='u', full_name='University') climate_model = get_or_create(ClimateModel, short_name='my_model', full_name='Really big model') experiment = get_or_create(Experiment, short_name='my_expt', full_name='Really detailed experiment') var_req = get_or_create(VariableRequest, table_name='Amon', long_name='very descriptive', units='1', var_name='var1', standard_name='var_name', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='var1', modeling_realm='atmos', frequency=FREQUENCY_VALUES['ann'], cell_measures='', uid='123abc') data_req = get_or_create(DataRequest, project=project, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var_req, request_start_time=_cmpts2num(1900, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR), request_end_time=_cmpts2num(2000, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR)) # Make some assertions data_req = DataRequest.objects.all()[0] self.assertEqual(data_req.institute.full_name, 'University') self.assertEqual(data_req.climate_model.short_name, 'my_model') self.assertEqual(data_req.experiment.short_name, 'my_expt') self.assertEqual(data_req.variable_request.cmor_name, 'var1') self.assertEqual(data_req.variable_request.modeling_realm, 'atmos')
def restore_files(queryset): """ Move ReplacedFile entries from `queryset` to a DataFile type :param django.db.models.query.QuerySet queryset: the DataFile objects to move. :raises TypeError: if a queryset entry is not a ReplacedFile. :raises ValueError: if unable to create a DataFile object """ num_files_restored = 0 for rep_file in queryset: if not isinstance(rep_file, ReplacedFile): raise TypeError('queryset entries must be of type ReplacedFile') data_file = DataFile.objects.create( name=rep_file.name, incoming_directory=rep_file.incoming_directory, directory=None, size=rep_file.size, tape_size=None, version=rep_file.version, project=rep_file.project, institute=rep_file.institute, climate_model=rep_file.climate_model, activity_id=rep_file.activity_id, experiment=rep_file.experiment, variable_request=rep_file.variable_request, data_request=rep_file.data_request, frequency=rep_file.frequency, rip_code=rep_file.rip_code, grid=rep_file.grid, start_time=rep_file.start_time, end_time=rep_file.end_time, time_units=rep_file.time_units, calendar=rep_file.calendar, data_submission=rep_file.data_submission, online=False, tape_url=rep_file.tape_url) if data_file: checksum = get_or_create(Checksum, data_file=data_file, checksum_value=rep_file.checksum_value, checksum_type=rep_file.checksum_type) rep_file.delete() num_files_restored += 1 else: raise ValueError( 'No DataFile object created for {}.'.format(rep_file)) logger.debug('{} files moved.'.format(num_files_restored))
def test_07_publish_to_esgf(self): # Create a ata submission to start with test_dsub = _make_data_submission() data_submission = DataSubmission.objects.all()[0] # Ingest into CEDA ceda_ds = get_or_create(CEDADataset, doi='doi:10.2514/1.A32039', catalogue_url='http://catalogue.ceda.ac.uk/uuid/85c7d0b09c974bd6abb07a324c2f427b', directory='/badc/some/dir') for df in data_submission.get_data_files(): df.ceda_dataset = ceda_ds df.ceda_opendap_url = 'http://dap.ceda.ac.uk/data/badc/cmip5/some/dir/' + df.name df.ceda_download_url = 'http://browse.ceda.ac.uk/browse/badc/cmip5/' + df.name df.save() # Create an ESGF data set esgf_ds = get_or_create(ESGFDataset, version='v20160720', directory='/some/dir', ceda_dataset=ceda_ds, data_request=DataRequest.objects.first()) # Update each file for df in data_submission.get_data_files(): df.esgf_dataset = esgf_ds df.esgf_opendap_url = 'http://esgf.ceda.ac.uk/data/badc/cmip5/some/dir/' + df.name df.esgf_download_url = 'http://esgf.ceda.ac.uk/browse/badc/cmip5/' + df.name df.save() # Make some assertions for dfile_name in test_dsub.files: df = DataFile.objects.get(name=dfile_name) self.assertEqual(df.esgf_dataset.get_full_id(), 'CMIP6.HighResMIP.MOHC.Python.amip4K.r1i1p1f1.' 'day.beans.gn.v20160720') self.assertEqual(df.esgf_download_url, 'http://esgf.ceda.ac.uk/browse/badc/cmip5/' + df.name)
def test_multiple_tapes(self): ret_req = get_or_create(RetrievalRequest, requester=self.user, start_year=1000, end_year=3000) ret_req.data_request.add(self.dreq1, self.dreq2) ret_req.save() class ArgparseNamespace(object): retrieval_id = ret_req.id no_restore = False skip_checksums = True alternative = None incoming = False self.mock_exists.side_effect = [ # first tape_url False, # if os.path.exists(retrieval_dir): True, # if not os.path.exists(extracted_file_path): True, # if not os.path.exists(drs_dir): False, # if os.path.exists(dest_file_path): # second tape_url False, # if os.path.exists(retrieval_dir): True, # if not os.path.exists(extracted_file_path): True, # if not os.path.exists(drs_dir): False, # if os.path.exists(dest_file_path): # third tape_url False, # if os.path.exists(retrieval_dir): True, # if not os.path.exists(extracted_file_path): True, # if not os.path.exists(drs_dir): False # if os.path.exists(dest_file_path): ] ns = ArgparseNamespace() get_tape_url('et:1234', [self.df1], ns) get_tape_url('et:5678', [self.df2], ns) get_tape_url('et:8765', [self.df3], ns) self.assertEqual(self.mock_rename.call_count, 3) for data_file in DataFile.objects.all(): self.assertTrue(data_file.online) self.assertIn(data_file.directory, [ '/gws/nopw/j04/primavera5/stream1/CMIP6/' 'HighResMIP/MOHC/MY-MODEL/experiment/r1i1p1f1/my-table/' 'my-var/gn/v12345678', '/gws/nopw/j04/primavera5/stream1/CMIP6/HighResMIP/' 'MOHC/MY-MODEL/experiment/r1i1p1f1/your-table/your-var/' 'gn/v12345678' ])
def test_dates(self): rreq = dbapi.get_or_create(models.RetrievalRequest, requester=self.user, start_year=1950, end_year=1975) rreq.data_request.add(self.dreq1) rreq.save() rreq.data_request.add(self.dreq2) rreq.save() self.assertEqual( 7, get_request_size(rreq.data_request.all(), rreq.start_year, rreq.end_year))
def test_05_create_data_issue(self): # Create a data submission to start with test_dsub = _make_data_submission() data_submission = DataSubmission.objects.all()[0] # Now, create the data issue reporter = get_or_create(User, username='******') data_issue = get_or_create(DataIssue, issue='test issue', reporter=reporter, date_time=make_aware(datetime.datetime(1978, 7, 19, 0, 0, 0, 0), pytz.UTC, None)) # Add the issue to all files in the submission for df in data_submission.get_data_files(): df.dataissue_set.add(data_issue) df.save() # Make some assertions for dfile_name in test_dsub.files: df = DataFile.objects.filter(name=dfile_name).first() self.assertEqual(df.dataissue_set.count(), 1) self.assertEqual(df.dataissue_set.first().issue, 'test issue')
def test_get_data_issues_with_single_issue_on_all_files(self): reporter = get_or_create(User, username='******') di = models.DataIssue(issue='unit test', reporter=reporter, date_time=datetime.datetime( 1950, 12, 13, 0, 0, 0, 0)) di.save() for df in self.dsub.get_data_files(): df.dataissue_set.add(di) issues = self.dsub.get_data_issues() self.assertEqual(issues, [di])
def test_no_files(self): rreq = dbapi.get_or_create(models.RetrievalRequest, requester=self.user, start_year=2001, end_year=2014) rreq.data_request.add(self.dreq1) rreq.save() rreq.data_request.add(self.dreq2) rreq.save() self.assertEqual( 0, get_request_size(rreq.data_request.all(), rreq.start_year, rreq.end_year, offline=True))
def setUp(self): _p = get_or_create(models.VariableRequest, table_name='Amon', long_name='very descriptive', units='1', var_name='var1', standard_name='var_name', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='var1', modeling_realm='atmos', frequency=FREQUENCY_VALUES['ann'], cell_measures='', uid='123abc')
def make_data_issue(): """ Create a dummy data issue """ di = get_or_create(DataIssue, issue='Beans are good for your heart', reporter='Jon Seddon', date_time=_cmpts2num(2016, 9, 2, 14, 38, 49, 0, TIME_UNITS, CALENDAR), time_units=TIME_UNITS, calendar=CALENDAR) bean_files = DataFile.objects.filter(name__istartswith='beans_') for data_file in bean_files: di.data_file.add(data_file) di.save()
def test_both_options(self): rreq = dbapi.get_or_create(models.RetrievalRequest, requester=self.user, start_year=1950, end_year=2014) rreq.data_request.add(self.dreq1) rreq.save() rreq.data_request.add(self.dreq2) rreq.save() self.assertRaises(ValueError, get_request_size, rreq.data_request.all(), rreq.start_year, rreq.end_year, online=True, offline=True)
def test_simplest(self): ret_req = get_or_create(RetrievalRequest, requester=self.user, start_year=1000, end_year=3000, id=999999) ret_req.data_request.add(self.dreq1) ret_req.save() class ArgparseNamespace(object): retrieval_id = ret_req.id no_restore = False skip_checksums = True alternative = None incoming = False self.mock_exists.side_effect = [ False, # if os.path.exists(retrieval_dir): True, # if not os.path.exists(extracted_file_path): True, # if not os.path.exists(drs_dir): False # if os.path.exists(dest_file_path): ] ns = ArgparseNamespace() get_tape_url('et:1234', [self.df1], ns) df = match_one(DataFile, name='file_one.nc') self.assertIsNotNone(df) self.mock_rename.assert_called_once_with( '/gws/nopw/j04/primavera5/.et_retrievals/ret_999999/' 'batch_01234/gws/MOHC/MY-MODEL/incoming/v12345678/file_one.nc', '/gws/nopw/j04/primavera5/stream1/CMIP6/HighResMIP/' 'MOHC/MY-MODEL/experiment/r1i1p1f1/my-table/my-var/gn/v12345678/' 'file_one.nc') self.assertTrue(df.online) self.assertEqual( df.directory, '/gws/nopw/j04/primavera5/' 'stream1/CMIP6/HighResMIP/MOHC/' 'MY-MODEL/experiment/r1i1p1f1/' 'my-table/my-var/gn/v12345678')
def test_06_ingest_to_ceda(self): # Create a ata submission to start with test_dsub = _make_data_submission() data_submission = DataSubmission.objects.all()[0] # Create a CEDA data set ceda_ds = get_or_create(CEDADataset, doi='doi:10.2514/1.A32039', catalogue_url='http://catalogue.ceda.ac.uk/uuid/85c7d0b09c974bd6abb07a324c2f427b', directory='/badc/some/dir') for df in data_submission.get_data_files(): df.ceda_dataset = ceda_ds df.ceda_opendap_url = 'http://dap.ceda.ac.uk/data/badc/cmip5/some/dir/' + df.name df.ceda_download_url = 'http://browse.ceda.ac.uk/browse/badc/cmip5/' + df.name df.save() # Make some assertions for dfile_name in test_dsub.files: df = DataFile.objects.filter(name=dfile_name).first() self.assertEqual(df.ceda_dataset.doi, 'doi:10.2514/1.A32039') self.assertEqual(df.ceda_download_url, 'http://browse.ceda.ac.uk/browse/badc/cmip5/' + df.name)
def test_get_data_issues_with_many_issues(self): reporter = get_or_create(User, username='******') di1 = models.DataIssue(issue='2nd test', reporter=reporter, date_time=datetime.datetime( 1805, 7, 5, 0, 0, 0)) di1.save() di2 = models.DataIssue(issue='unit test', reporter=reporter, date_time=datetime.datetime( 1950, 12, 13, 0, 0, 0, 0)) di2.save() datafile = self.dsub.get_data_files()[0] datafile.dataissue_set.add(di2) datafile = self.dsub.get_data_files()[1] datafile.dataissue_set.add(di1) issues = self.dsub.get_data_issues() self.assertEqual(issues, [di2, di1])
def test_retrieval_already_complete(self): completion_time = datetime.datetime(2017, 10, 31, 23, 59, 59) completion_time = make_aware(completion_time) ret_req = get_or_create(RetrievalRequest, requester=self.user, date_complete=completion_time, start_year=1000, end_year=3000) class ArgparseNamespace(object): retrieval_id = ret_req.id no_restore = False skip_checksums = True alternative = None incoming = False ns = ArgparseNamespace() self.assertRaises(SystemExit, main, ns) self.mock_logger.error.assert_called_with( 'Retrieval {} was already ' 'completed, at {}.'.format( ret_req.id, completion_time.strftime('%Y-%m-%d %H:%M')))
def main(args): """ Main entry point """ # new_vreqs = ['lwsnl_Eday', 'mrros_Eday', 'snm_Eday', 'snd_Eday'] new_vreqs = [ {'table_name': 'Eday', 'long_name': 'Liquid Water Content of Snow Layer', 'units': 'kg m-2', 'var_name': 'lwsnl', 'standard_name': 'liquid_water_content_of_snow_layer', 'cell_methods': 'area: mean where land time: mean', 'positive': '', 'variable_type': 'real', 'dimensions': 'longitude latitude time', 'cmor_name': 'lwsnl', 'modeling_realm': 'landIce land', 'frequency': 'day', 'cell_measures': 'area: areacella', 'uid': 'd228925a-4a9f-11e6-b84e-ac72891c3257'}, {'table_name': 'Eday', 'long_name': 'Surface Runoff', 'units': 'kg m-2 s-1', 'var_name': 'mrros', 'standard_name': 'surface_runoff_flux', 'cell_methods': 'area: mean where land time: mean', 'positive': '', 'variable_type': 'real', 'dimensions': 'longitude latitude time', 'cmor_name': 'mrros', 'modeling_realm': 'land', 'frequency': 'day', 'cell_measures': 'area: areacella', 'uid': 'd2284048-4a9f-11e6-b84e-ac72891c3257'}, {'table_name': 'Eday', 'long_name': 'Surface Snow Melt', 'units': 'kg m-2 s-1', 'var_name': 'snm', 'standard_name': 'surface_snow_melt_flux', 'cell_methods': 'area: mean where land time: mean', 'positive': '', 'variable_type': 'real', 'dimensions': 'longitude latitude time', 'cmor_name': 'snm', 'modeling_realm': 'landIce land', 'frequency': 'day', 'cell_measures': 'area: areacella', 'uid': 'd22848ea-4a9f-11e6-b84e-ac72891c3257'}, {'table_name': 'Eday', 'long_name': 'Snow Depth', 'units': 'm', 'var_name': 'snd', 'standard_name': 'surface_snow_thickness', 'cell_methods': 'area: mean where land time: mean', 'positive': '', 'variable_type': 'real', 'dimensions': 'longitude latitude time', 'cmor_name': 'snd', 'modeling_realm': 'landIce land', 'frequency': 'day', 'cell_measures': 'area: areacella', 'uid': 'b7ccdf0a-7c00-11e6-bcdf-ac72891c3257'} ] new_dreqs = [ 'wap_Emon', 'hfdsn_LImon', 'snc_day', 'va27_Emon', 'rsuscs_CFday', 'vt_Emon', 'hursmin_day', 'uv_Emon', 'cl_Amon', 'rldscs_CFday', 'lwp_Primmon', 'uwap_Emon', 'rsdscs_Amon', 'zg_day', 'evspsblsoi_Lmon', 'snc_LImon', 'v2_Emon', 'ta27_Emon', 'tsl_Lmon', 'lwsnl_LImon', 'sbl_LImon', 'wap2_Emon', 'ut_Emon', 'hursmax_day', 't2_Emon', 'rsuscs_Amon', 'rsdscs_CFday', 'mrsos_day', 'hus27_Emon', 'snm_LImon', 'tsn_LImon', 'rldscs_Amon', 'vwap_Emon', 'zg27_Emon', 'twap_Emon', 'u2_Emon', 'ua27_Emon', 'lwsnl_Eday', 'mrros_Eday', 'snm_Eday', 'snd_Eday' ] institute_details = { 'id': 'EC-Earth-Consortium', 'model_ids': ['EC-Earth3-LR', 'EC-Earth3-HR'], 'calendar': CALENDAR_GREGORIAN } experiments = { 'control-1950': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2050, 1, 1)}, 'highres-future': {'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1)}, 'hist-1950': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'highresSST-present': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'highresSST-future': {'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1)}, 'highresSST-LAI': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'highresSST-smoothed': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'highresSST-p4K': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'highresSST-4co2': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1)}, 'spinup-1950': {'start_date': datetime(1950, 1, 1), 'end_date': datetime(1980, 1, 1)}, } # Experiment experiment_objs = [] for expt in experiments: expt_obj = match_one(Experiment, short_name=expt) if expt_obj: experiment_objs.append(expt_obj) else: msg = 'experiment {} not found in the database.'.format(expt) print(msg) raise ValueError(msg) # Institute result = match_one(Institute, short_name=institute_details['id']) if result: institute = result else: msg = 'institute_id {} not found in the database.'.format( institute_details['id'] ) print(msg) raise ValueError(msg) # Look up the ClimateModel object for each institute_id and save the # results to a dictionary for quick look up later model_objs = [] for clim_model in institute_details['model_ids']: result = match_one(ClimateModel, short_name=clim_model) if result: model_objs.append(result) else: msg = ('climate_model {} not found in the database.'. format(clim_model)) print(msg) raise ValueError(msg) # The standard reference time std_units = Settings.get_solo().standard_time_units # create the additional variable requests for new_vreq in new_vreqs: _vr = get_or_create(VariableRequest, **new_vreq) # create the new data requests for new_dreq in new_dreqs: cmor_name, table_name = new_dreq.split('_') if table_name.startswith('Prim'): project = match_one(Project, short_name='PRIMAVERA') else: project = match_one(Project, short_name='CMIP6') var_req_obj = match_one(VariableRequest, cmor_name=cmor_name, table_name=table_name) if var_req_obj: for expt in experiment_objs: for clim_model in model_objs: _dr = get_or_create( DataRequest, project=project, institute=institute, climate_model=clim_model, experiment=expt, variable_request=var_req_obj, request_start_time=date2num( experiments[expt.short_name]['start_date'], std_units, institute_details['calendar'] ), request_end_time=date2num( experiments[expt.short_name]['end_date'], std_units, institute_details['calendar'] ), time_units=std_units, calendar=institute_details['calendar'] ) else: msg = ('Unable to find variable request matching ' 'cmor_name {} and table_name {} in the ' 'database.'.format(cmor_name, table_name)) print(msg) raise ValueError(msg)
def main(args): """ Main entry point """ institute_details = { 'id': 'AWI', 'model_ids': ['AWI-CM-1-1-LR', 'AWI-CM-1-1-HR'], 'calendar': CALENDAR_STANDARD } experiments = { 'control-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2050, 1, 1) }, 'highres-future': { 'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1) }, 'hist-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'spinup-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(1980, 1, 1) } } variant_label = 'r1i1p1f2' # Experiment new_dreqs = [ 'rsut_E3hr', ] experiment_objs = [] for expt in experiments: expt_obj = match_one(Experiment, short_name=expt) if expt_obj: experiment_objs.append(expt_obj) else: msg = 'experiment {} not found in the database.'.format(expt) print(msg) raise ValueError(msg) # Institute result = match_one(Institute, short_name=institute_details['id']) if result: institute = result else: msg = 'institute_id {} not found in the database.'.format( institute_details['id']) print(msg) raise ValueError(msg) # Look up the ClimateModel object for each institute_id and save the # results to a dictionary for quick look up later model_objs = [] for clim_model in institute_details['model_ids']: result = match_one(ClimateModel, short_name=clim_model) if result: model_objs.append(result) else: msg = ('climate_model {} not found in the database.'.format( clim_model)) print(msg) raise ValueError(msg) # The standard reference time std_units = Settings.get_solo().standard_time_units # create the new data requests for new_dreq in new_dreqs: cmor_name, table_name = new_dreq.split('_') if table_name.startswith('Prim'): project = match_one(Project, short_name='PRIMAVERA') else: project = match_one(Project, short_name='CMIP6') var_req_obj = match_one(VariableRequest, cmor_name=cmor_name, table_name=table_name) if var_req_obj: for expt in experiment_objs: for clim_model in model_objs: try: _dr = get_or_create( DataRequest, project=project, institute=institute, climate_model=clim_model, experiment=expt, variable_request=var_req_obj, request_start_time=date2num( experiments[expt.short_name]['start_date'], std_units, institute_details['calendar']), request_end_time=date2num( experiments[expt.short_name]['end_date'], std_units, institute_details['calendar']), time_units=std_units, calendar=institute_details['calendar'], rip_code=variant_label) except django.core.exceptions.MultipleObjectsReturned: logger.error('{}'.format(var_req_obj)) raise else: msg = ('Unable to find variable request matching ' 'cmor_name {} and table_name {} in the ' 'database.'.format(cmor_name, table_name)) print(msg) raise ValueError(msg)
def main(args): """ Main entry point """ new_dreqs = [ 'siage_SImon', 'sisnthick_SImon', 'sicompstren_SImon', 'sisali_SImon', 'hus1000_Prim6hrPt', 'siconc_SImon', 'sistrxdtop_SImon', 'siv_SImon', 'sivol_SImon', 'sistrydtop_SImon', 'va1000_Prim6hrPt', 'sidmassevapsubl_SImon', 'sithick_SImon', 'sispeed_SImon', 'sndmasssnf_SImon', 'siflswdtop_SImon', 'sitemptop_SImon', 'ua1000_Prim6hrPt', 'siu_SImon' ] institute_details = { 'id': 'EC-Earth-Consortium', 'model_ids': ['EC-Earth3', 'EC-Earth3-HR'], 'calendar': CALENDAR_GREGORIAN } experiments = { 'control-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2050, 1, 1) }, 'highres-future': { 'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1) }, 'hist-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-present': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-future': { 'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1) }, 'highresSST-LAI': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-smoothed': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-p4K': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-4co2': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'spinup-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(1980, 1, 1) }, } # Experiment experiment_objs = [] for expt in experiments: expt_obj = match_one(Experiment, short_name=expt) if expt_obj: experiment_objs.append(expt_obj) else: msg = 'experiment {} not found in the database.'.format(expt) print(msg) raise ValueError(msg) # Institute result = match_one(Institute, short_name=institute_details['id']) if result: institute = result else: msg = 'institute_id {} not found in the database.'.format( institute_details['id']) print(msg) raise ValueError(msg) # Look up the ClimateModel object for each institute_id and save the # results to a dictionary for quick look up later model_objs = [] for clim_model in institute_details['model_ids']: result = match_one(ClimateModel, short_name=clim_model) if result: model_objs.append(result) else: msg = ('climate_model {} not found in the database.'.format( clim_model)) print(msg) raise ValueError(msg) # The standard reference time std_units = Settings.get_solo().standard_time_units # create the new data requests for new_dreq in new_dreqs: cmor_name, table_name = new_dreq.split('_') if table_name.startswith('Prim'): project = match_one(Project, short_name='PRIMAVERA') else: project = match_one(Project, short_name='CMIP6') var_req_obj = match_one(VariableRequest, cmor_name=cmor_name, table_name=table_name) if var_req_obj: for expt in experiment_objs: for clim_model in model_objs: _dr = get_or_create( DataRequest, project=project, institute=institute, climate_model=clim_model, experiment=expt, variable_request=var_req_obj, request_start_time=date2num( experiments[expt.short_name]['start_date'], std_units, institute_details['calendar']), request_end_time=date2num( experiments[expt.short_name]['end_date'], std_units, institute_details['calendar']), time_units=std_units, calendar=institute_details['calendar']) else: msg = ('Unable to find variable request matching ' 'cmor_name {} and table_name {} in the ' 'database.'.format(cmor_name, table_name)) print(msg) raise ValueError(msg)
def main(args): """ Main entry point """ # new_vreqs = ['lwsnl_Eday', 'mrros_Eday', 'snm_Eday', 'snd_Eday'] new_dreqs = [ 'mrlsl_6hrPlevPt', 'snw_6hrPlevPt', 'psl_6hrPlevPt', 'rsdt_E3hr', 'prw_E3hr', 'tas_6hrPlevPt', 'clivi_E3hr', 'mrsos_6hrPlevPt', 'ta7h_6hrPlevPt', 'clwvi_E3hr', 'pr_Prim6hr', 'rsdscs_3hr', 'va7h_6hrPlevPt', 'ts_6hrPlevPt', 'ua7h_6hrPlevPt', 'rsut_E3hr', 'clt_Prim6hr', 'ta27_6hrPlevPt', 'tsl_6hrPlevPt', 'rldscs_3hr', 'rlut_E3hr', 'huss_6hrPlevPt', 'hus27_6hrPlevPt', 'rlutcs_E3hr', 'hus7h_6hrPlevPt', 'rsuscs_3hr', 'ps_Prim6hr', 'sfcWind_6hrPlevPt', 'zg27_6hrPlevPt', 'rsds_Prim6hr' ] institute_details = { 'id': 'EC-Earth-Consortium', 'model_ids': ['EC-Earth3-LR', 'EC-Earth3-HR'], 'calendar': CALENDAR_GREGORIAN } experiments = { 'control-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2050, 1, 1) }, 'highres-future': { 'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1) }, 'hist-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-present': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-future': { 'start_date': datetime(2015, 1, 1), 'end_date': datetime(2051, 1, 1) }, 'highresSST-LAI': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-smoothed': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-p4K': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'highresSST-4co2': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(2015, 1, 1) }, 'spinup-1950': { 'start_date': datetime(1950, 1, 1), 'end_date': datetime(1980, 1, 1) }, } # Experiment experiment_objs = [] for expt in experiments: expt_obj = match_one(Experiment, short_name=expt) if expt_obj: experiment_objs.append(expt_obj) else: msg = 'experiment {} not found in the database.'.format(expt) print(msg) raise ValueError(msg) # Institute result = match_one(Institute, short_name=institute_details['id']) if result: institute = result else: msg = 'institute_id {} not found in the database.'.format( institute_details['id']) print(msg) raise ValueError(msg) # Look up the ClimateModel object for each institute_id and save the # results to a dictionary for quick look up later model_objs = [] for clim_model in institute_details['model_ids']: result = match_one(ClimateModel, short_name=clim_model) if result: model_objs.append(result) else: msg = ('climate_model {} not found in the database.'.format( clim_model)) print(msg) raise ValueError(msg) # The standard reference time std_units = Settings.get_solo().standard_time_units # create the new data requests for new_dreq in new_dreqs: cmor_name, table_name = new_dreq.split('_') if table_name.startswith('Prim'): project = match_one(Project, short_name='PRIMAVERA') else: project = match_one(Project, short_name='CMIP6') var_req_obj = match_one(VariableRequest, cmor_name=cmor_name, table_name=table_name) if var_req_obj: for expt in experiment_objs: for clim_model in model_objs: _dr = get_or_create( DataRequest, project=project, institute=institute, climate_model=clim_model, experiment=expt, variable_request=var_req_obj, request_start_time=date2num( experiments[expt.short_name]['start_date'], std_units, institute_details['calendar']), request_end_time=date2num( experiments[expt.short_name]['end_date'], std_units, institute_details['calendar']), time_units=std_units, calendar=institute_details['calendar']) else: msg = ('Unable to find variable request matching ' 'cmor_name {} and table_name {} in the ' 'database.'.format(cmor_name, table_name)) print(msg) raise ValueError(msg)
def make_data_request(): """ Create a DataRequest that matches files in the later submission """ # Make the variable chips from the Monty model for which all data is available project = get_or_create( Project, short_name='CMIP6', full_name='Coupled Model Intercomparison Project Phase 6') institute = get_or_create(Institute, short_name='MOHC', full_name='Met Office Hadley Centre') climate_model = get_or_create(ClimateModel, short_name='Monty', full_name='Really good model') experiment = get_or_create(Experiment, short_name='rcp45', full_name='Really good experiment') var_req = get_or_create(VariableRequest, table_name='cfDay', long_name='Really good variable', units='1', var_name='chips', standard_name='really_good_variable', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='chips', modeling_realm='atmos', frequency=FREQUENCY_VALUES['day'], cell_measures='', uid='123abc') data_req = get_or_create(DataRequest, project=project, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var_req, start_time=_cmpts2num(1991, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR), end_time=_cmpts2num(1993, 12, 30, 0, 0, 0, 0, TIME_UNITS, CALENDAR), time_units=TIME_UNITS, calendar=CALENDAR) # Make the variable spam from the Python model for which one year is missing institute = get_or_create(Institute, short_name='IPSL', full_name='Institut Pierre Simon Laplace') climate_model = get_or_create(ClimateModel, short_name='Python', full_name='Really good model') experiment = get_or_create(Experiment, short_name='abrupt4xCO2', full_name='Really good experiment') var_req = get_or_create(VariableRequest, table_name='cfDay', long_name='Really good variable', units='1', var_name='spam', standard_name='really_good_variable', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='spam', modeling_realm='atmos', frequency=FREQUENCY_VALUES['day'], cell_measures='', uid='123abc') data_req = get_or_create(DataRequest, project=project, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var_req, start_time=_cmpts2num(1991, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR), end_time=_cmpts2num(1994, 12, 30, 0, 0, 0, 0, TIME_UNITS, CALENDAR), time_units=TIME_UNITS, calendar=CALENDAR) # Make two requests that are entirely missing var_req = get_or_create(VariableRequest, table_name='Aday', long_name='Really good variable', units='1', var_name='pie', standard_name='really_good_variable', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='pie', modeling_realm='atmos', frequency=FREQUENCY_VALUES['day'], cell_measures='', uid='123abc') data_req = get_or_create(DataRequest, project=project, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var_req, start_time=_cmpts2num(1991, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR), end_time=_cmpts2num(1994, 12, 30, 0, 0, 0, 0, TIME_UNITS, CALENDAR), time_units=TIME_UNITS, calendar=CALENDAR) var_req = get_or_create(VariableRequest, table_name='Aday', long_name='Really good variable', units='1', var_name='cake', standard_name='really_good_variable', cell_methods='time:mean', positive='optimistic', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='cake', modeling_realm='atmos', frequency=FREQUENCY_VALUES['day'], cell_measures='', uid='123abc') data_req = get_or_create(DataRequest, project=project, institute=institute, climate_model=climate_model, experiment=experiment, variable_request=var_req, start_time=_cmpts2num(1991, 1, 1, 0, 0, 0, 0, TIME_UNITS, CALENDAR), end_time=_cmpts2num(1994, 12, 30, 0, 0, 0, 0, TIME_UNITS, CALENDAR), time_units=TIME_UNITS, calendar=CALENDAR) # generate variable requests for the remaining files in the later submission var_req = get_or_create(VariableRequest, table_name='day', long_name='Really good variable', units='1', var_name='beans', standard_name='really_good_variable', cell_methods='time:mean', positive='smelly', variable_type=VARIABLE_TYPES['real'], dimensions='massive', cmor_name='beans', modeling_realm='atmos', frequency=FREQUENCY_VALUES['day'], cell_measures='', uid='123abc')