def main(): """ Main entry point """ hist_mon_day = filter_hadgem_stream2( DataRequest.objects.filter( institute__short_name='MOHC', climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='hist-1950', variable_request__frequency__in=['mon', 'day', '3hr'], rip_code='r1i1p1f1')) ctrl_3hr = DataRequest.objects.filter( institute__short_name='NERC', climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='control-1950', variable_request__table_name__in=['3hr', 'E3hrPt']).distinct() ctrl_mon_day = filter_hadgem_stream2( DataRequest.objects.filter( institute__short_name='NERC', climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='control-1950', variable_request__frequency__in=['mon', 'day'], rip_code='r1i1p1f1')) dreqs = (hist_mon_day | ctrl_mon_day | ctrl_3hr) logger.info('%s data requests found', dreqs.count()) for dreq in dreqs: _remove_hh_dreq(dreq)
def calc_item_size(path): """ Calculate the size in bytes of all datasets below the specified `path`. :param str path: the path to the item :returns: the size in bytes """ filter_terms = {'datafile__isnull': False, 'project__short_name': 'CMIP6'} components = path.split('/') index_to_str = { 0: 'climate_model__short_name', 1: 'experiment__short_name', 2: 'rip_code', 3: 'variable_request__table_name', 4: 'variable_request__cmor_name' } for index, cmpnt in enumerate(components[7:]): filter_terms[index_to_str[index]] = cmpnt # Handle the EC-Earth AMIP special cases if (components[7] == 'EC-Earth3P' and components[8] == 'highresSST-present'): del filter_terms['climate_model__short_name'] filter_terms['climate_model__short_name__in'] = [ 'EC-Earth3P', 'EC-Earth3' ] if (components[7] == 'EC-Earth3P-HR' and components[8] == 'highresSST-present'): del filter_terms['climate_model__short_name'] filter_terms['climate_model__short_name__in'] = [ 'EC-Earth3P-HR', 'EC-Earth3-HR' ] # Find the data requests data_reqs = DataRequest.objects.filter(**filter_terms).distinct() # Handle HadGEM special cases if components[7].startswith('HadGEM'): stream1 = data_reqs.filter(rip_code='r1i1p1f1') stream2 = data_reqs.exclude(rip_code='r1i1p1f1') data_reqs = stream1 | filter_hadgem_stream2(stream2) # Calculate the volumes total_file_size = (DataFile.objects.filter( data_request__in=data_reqs).distinct().aggregate( Sum('size'))['size__sum']) return total_file_size
def main(args): """ Main entry point """ start_year = 1948 end_year = 2051 # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter( # climate_model__short_name='HadGEM3-GC31-HH', # experiment__short_name='highres-future', # # variable_request__frequency__in=['3hr'], # datafile__isnull=False # ).exclude( # variable_request__table_name__startswith='O' # ).exclude( # variable_request__table_name__startswith='SI' # ).exclude( # variable_request__table_name__startswith='Prim' # ).distinct()) data_reqs = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MH', experiment__short_name='spinup-1950', variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'], datafile__isnull=False).distinct()) logger.info('Total data volume: {} Volume to restore: {}'.format( filesizeformat(get_request_size(data_reqs, start_year, end_year)).replace('\xa0', ' '), filesizeformat( get_request_size(data_reqs, start_year, end_year, offline=True)).replace('\xa0', ' '), )) if args.create: jon = User.objects.get(username='******') rr = RetrievalRequest.objects.create(requester=jon, start_year=start_year, end_year=end_year) time_zone = datetime.timezone(datetime.timedelta()) rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone) rr.save() rr.data_request.add(*data_reqs) logger.info('Retrieval request {} created.'.format(rr.id))
def main(args): """ Main entry point """ start_year = 1948 end_year = 2051 data_reqs = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-LL', experiment__short_name='hist-1950', # rip_code__in=[f'r1i{i}p1f1' for i in range(2,9)], rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).distinct()) logger.debug('Total data volume: {} Volume to restore: {}'.format( filesizeformat(get_request_size(data_reqs, start_year, end_year)).replace('\xa0', ' '), filesizeformat( get_request_size(data_reqs, start_year, end_year, offline=True)).replace('\xa0', ' '), )) if args.create: jon = User.objects.get(username='******') rr = RetrievalRequest.objects.create(requester=jon, start_year=start_year, end_year=end_year) time_zone = datetime.timezone(datetime.timedelta()) rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone) rr.save() rr.data_request.add(*data_reqs) logger.debug('Retrieval request {} created.'.format(rr.id))
def main(args): """ Main entry point Task names in the output JSON file are in the form: <climate-model>_<experiment>_<variant-label>_<table>_<variable> e.g.: HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl """ existing_tasks = [] if os.path.exists(args.json_file): with open(args.json_file) as fh: existing_tasks = json.load(fh) logger.debug('{} existing tasks loaded from file'.format( len(existing_tasks))) hh_pres_day = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name__in=['control-1950', 'hist-1950'], datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) hh_ctrl_3hr = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='control-1950', variable_request__table_name='3hr', datafile__isnull=False).distinct() hh_future = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='highres-future', datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) mh_spinup = filter_hadgem_stream2( DataRequest.objects.filter(climate_model__short_name='HadGEM3-GC31-MH', experiment__short_name='spinup-1950', variable_request__table_name__in=[ 'Omon', 'Oday', 'PrimOmon', 'PrimOday' ], datafile__isnull=False).distinct()) hm_epfyz = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name__in=[ 'control-1950', 'highres-future', 'hist-1950' ], rip_code='r1i1p1f1', variable_request__cmor_name__in=['epfy', 'epfz'], datafile__isnull=False).distinct()) orca12_cice = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name__in=['HadGEM3-GC31-HH'], experiment__short_name__in=[ 'control-1950', 'highres-future', 'hist-1950' ], variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'], datafile__isnull=False).distinct()) orca12_cice_spinup = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MH', experiment__short_name='spinup-1950', variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'], datafile__isnull=False).distinct()) # task querysets can be ORed together with | all_tasks = (hh_pres_day | hh_ctrl_3hr | hh_future | mh_spinup | hm_epfyz | orca12_cice | orca12_cice_spinup) task_name_list = [ '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name, dr.experiment.short_name, dr.rip_code, dr.variable_request.table_name, dr.variable_request.cmor_name) for dr in all_tasks ] logger.debug('{} tasks in total'.format(len(all_tasks))) with open(args.json_file, 'w') as fh: json.dump(task_name_list, fh, indent=4) if existing_tasks: new_tasks_list = list(set(task_name_list) - set(existing_tasks)) new_tasks_file = args.json_file.replace('.json', '_new.json') if os.path.exists(new_tasks_file): suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M') os.rename(new_tasks_file, new_tasks_file + '.' + suffix) with open(new_tasks_file, 'w') as fh: json.dump(new_tasks_list, fh, indent=4) logger.debug('{} new tasks'.format(len(new_tasks_list)))
def main(): """ Run the processing. """ amip_expts = ['highresSST-present', 'highresSST-future'] coupled_expts = [ 'spinup-1950', 'hist-1950', 'control-1950', 'highres-future' ] stream1_2_expts = amip_expts + coupled_expts # MOHC stream 2 is members r1i2p2f1 to r1i15p1f1 hadgem_stream2_members = [ f'r1i{init_index}p1f1' for init_index in range(2, 16) ] other_models = DataRequest.objects.filter( project__short_name='PRIMAVERA', experiment__short_name__in=stream1_2_expts, variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( # Exclude HadGEM2 stream 2 for the moment climate_model__short_name__startswith='HadeGEM', rip_code__in=hadgem_stream2_members).exclude( # Exclude EC-Earth coupled r1i1p1f1 institute__short_name='EC-Earth-Consortium', experiment__short_name__in=coupled_expts, rip_code='r1i1p1f1').distinct() hadgem_s2 = filter_hadgem_stream2( DataRequest.objects.filter( project__short_name='PRIMAVERA', experiment__short_name__in=stream1_2_expts, variable_request__table_name__startswith='Prim', climate_model__short_name__startswith='HadeGEM', rip_code__in=hadgem_stream2_members, datafile__isnull=False)).distinct() ec_earth_s1 = DataRequest.objects.filter( institute__short_name='EC-Earth-Consortium', experiment__short_name__in=coupled_expts, rip_code='r1i1p1f1', datafile__isnull=False).distinct() wp5 = DataRequest.objects.filter(experiment__short_name__in=[ 'primWP5-amv-neg', 'primWP5-amv-pos', 'dcppc-amv-neg', 'dcppc-amv-pos' ], datafile__isnull=False).distinct() prim_reqs = other_models | hadgem_s2 | ec_earth_s1 | wp5 unique_expts = (prim_reqs.values_list( 'institute__short_name', 'climate_model__short_name', 'experiment__short_name', 'rip_code', 'variable_request__table_name').distinct().order_by( 'institute__short_name', 'climate_model__short_name', 'experiment__short_name', 'rip_code', 'variable_request__table_name')) with open(FILENAME, 'w') as fh: fh.write('drs_id, Volume (TB)\n') for inst_name, model_name, expt_name, rip_code, table_name in unique_expts: dreqs = prim_reqs.filter(institute__short_name=inst_name, climate_model__short_name=model_name, experiment__short_name=expt_name, rip_code=rip_code, variable_request__table_name=table_name) if dreqs: dreq_size = (DataFile.objects.filter( data_request__in=dreqs).distinct().aggregate( Sum('size'))['size__sum']) df = dreqs.first().datafile_set.first() drs_id = (f'PRIMAVERA.' f'{df.activity_id.short_name}.' f'{df.institute.short_name}.' f'{df.climate_model.short_name}.' f'{df.experiment.short_name}.' f'{df.rip_code}.' f'{df.variable_request.table_name}') if 'MPI' in drs_id and 'DCPP' in drs_id: drs_id = (drs_id.replace('DCPP', 'primWP5').replace( 'dcppc', 'primWP5')) if 'NCAS' in drs_id: drs_id = drs_id.replace('NCAS', 'NERC') fh.write(f'{drs_id}, {dreq_size / 1024**4}\n')
def main(args): """ Main entry point """ start_year = 1948 end_year = 2051 # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter( # climate_model__short_name='HadGEM3-GC31-LL', # experiment__short_name='highres-future', # rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i4p1f1'], # variable_request__table_name__in=[ # '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', # 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', # 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' # ], # datafile__isnull=False # ).exclude( # variable_request__table_name__startswith='Prim' # ).distinct()) data_reqs = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MM', experiment__short_name='highres-future', rip_code__in=['r1i2p1f1', 'r1i3p1f1'], variable_request__table_name__in=[ '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' ], datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter( # climate_model__short_name='HadGEM3-GC31-HM', # experiment__short_name='highres-future', # rip_code='r1i3p1f1', # variable_request__table_name__in=[ # '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', # 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', # 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' # ], # datafile__isnull=False # ).exclude( # variable_request__table_name__startswith='Prim' # ).distinct()) logger.debug('Total data volume: {} Volume to restore: {}'.format( filesizeformat(get_request_size(data_reqs, start_year, end_year)).replace('\xa0', ' '), filesizeformat( get_request_size(data_reqs, start_year, end_year, offline=True)).replace('\xa0', ' '), )) if args.create: jon = User.objects.get(username='******') rr = RetrievalRequest.objects.create(requester=jon, start_year=start_year, end_year=end_year) time_zone = datetime.timezone(datetime.timedelta()) rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone) rr.save() rr.data_request.add(*data_reqs) logger.debug('Retrieval request {} created.'.format(rr.id))
def main(args): """ Main entry point Task names in the output JSON file are in the form: <climate-model>_<experiment>_<variant-label>_<table>_<variable> e.g.: HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl """ existing_tasks = [] if os.path.exists(args.json_file): with open(args.json_file) as fh: existing_tasks = json.load(fh) logger.debug('{} existing tasks loaded from file'.format( len(existing_tasks))) ll_hist = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-LL', experiment__short_name='hist-1950', rip_code__in=[f'r1i{i}p1f1' for i in range(2, 9)], variable_request__table_name__in=[ '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' ], datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) mm_hist = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MM', experiment__short_name='hist-1950', rip_code__in=['r1i2p1f1', 'r1i3p1f1'], variable_request__table_name__in=[ '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' ], datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) hm_hist = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name='hist-1950', rip_code__in=['r1i2p1f1', 'r1i3p1f1'], variable_request__table_name__in=[ '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon', 'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday', 'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day' ], datafile__isnull=False).exclude( variable_request__table_name__startswith='Prim').distinct()) # task querysets can be ORed together with | all_tasks = (ll_hist | mm_hist | hm_hist) task_name_list = [ '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name, dr.experiment.short_name, dr.rip_code, dr.variable_request.table_name, dr.variable_request.cmor_name) for dr in all_tasks ] logger.debug('{} tasks in total'.format(len(all_tasks))) with open(args.json_file, 'w') as fh: json.dump(task_name_list, fh, indent=4) if existing_tasks: new_tasks_list = list(set(task_name_list) - set(existing_tasks)) new_tasks_file = args.json_file.replace('.json', '_new.json') if os.path.exists(new_tasks_file): suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M') os.rename(new_tasks_file, new_tasks_file + '.' + suffix) with open(new_tasks_file, 'w') as fh: json.dump(new_tasks_list, fh, indent=4) logger.debug('{} new tasks'.format(len(new_tasks_list)))
def main(args): """ Main entry point Task names in the output JSON file are in the form: <climate-model>_<experiment>_<variant-label>_<table>_<variable> e.g.: HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl """ existing_tasks = [] if os.path.exists(args.json_file): with open(args.json_file) as fh: existing_tasks = json.load(fh) logger.debug('{} existing tasks loaded from file'.format( len(existing_tasks))) fx = DataRequest.objects.filter( climate_model__short_name__startswith='HadGEM3-GC31', variable_request__table_name__in=['fx', 'Ofx'], datafile__isnull=False).distinct() ll_hist = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-LL', experiment__short_name='hist-1950', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() ll_hist_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-LL', experiment__short_name='hist-1950', rip_code__in=[f'r1i{i}p1f1' for i in range(2, 9)], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct()) mm_hist = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MM', experiment__short_name='hist-1950', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() mm_hist_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MM', experiment__short_name='hist-1950', rip_code__in=[f'r1i{i}p1f1' for i in range(2, 4)], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct()) hm_hist = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name='hist-1950', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() hm_hist_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name='hist-1950', rip_code__in=[f'r1i{i}p1f1' for i in range(2, 4)], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct()) ll_ctrl = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-LL', experiment__short_name__in=['control-1950', 'spinup-1950'], rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() mm_ctrl = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-MM', experiment__short_name__in=['control-1950', 'spinup-1950'], rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() hm_ctrl = DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name__in=['control-1950'], # , 'spinup-1950'], rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday').distinct() hh_ctrl = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name__in=['control-1950'], # , 'spinup-1950'], rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).distinct()) hh_hist = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='hist-1950', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).distinct()) hh_future = filter_hadgem_stream2( DataRequest.objects.filter(climate_model__short_name='HadGEM3-GC31-HH', experiment__short_name='highres-future', rip_code='r1i1p1f1', variable_request__table_name__in=[ 'Primday', 'PrimdayPt', 'Prim3hr', 'Prim3hrPt', 'Prim6hr', 'PrimOday', 'PrimOmon' ], datafile__isnull=False).distinct()) ll_mm_hm_amip_s1 = DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM' ], experiment__short_name='highresSST-present', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct() ll_mm_amip_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM' ], experiment__short_name='highresSST-present', rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i14p1f1', 'r1i15p1f1'], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct()) hm_amip_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name='HadGEM3-GC31-HM', experiment__short_name='highresSST-present', rip_code__in=['r1i2p1f1', 'r1i3p1f1'], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct()) ll_mm_hm_famip_s1 = DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM' ], experiment__short_name='highresSST-future', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct() ll_mm_famip_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM' ], experiment__short_name='highresSST-future', rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i14p1f1', 'r1i15p1f1'], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct()) fcoup_s1 = DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM' ], experiment__short_name='highres-future', rip_code='r1i1p1f1', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct() fcoup_s2 = filter_hadgem_stream2( DataRequest.objects.filter( climate_model__short_name__in=[ 'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM' ], experiment__short_name='highres-future', rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i4p1f1'], variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct()) spinup = DataRequest.objects.filter( climate_model__short_name__startswith='HadGEM3-GC31', experiment__short_name='spinup-1950', variable_request__table_name__startswith='Prim', datafile__isnull=False).exclude( variable_request__table_name='PrimSIday', ).distinct() fx_Ofx = DataRequest.objects.filter( climate_model__short_name__startswith='HadGEM3-GC31', variable_request__table_name__contains='fx', datafile__isnull=False).distinct() # task querysets can be ORed together with | all_tasks = (fx | ll_hist | ll_hist_s2 | mm_hist | mm_hist_s2 | hm_hist | hm_hist_s2 | ll_ctrl | mm_ctrl | hm_ctrl | hh_ctrl | hh_hist | hh_future | ll_mm_hm_amip_s1 | ll_mm_amip_s2 | hm_amip_s2 | ll_mm_hm_famip_s1 | ll_mm_famip_s2 | fcoup_s1 | fcoup_s2 | spinup | fx_Ofx) task_name_list = [ '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name, dr.experiment.short_name, dr.rip_code, dr.variable_request.table_name, dr.variable_request.cmor_name) for dr in all_tasks ] logger.debug('{} tasks in total'.format(len(all_tasks))) with open(args.json_file, 'w') as fh: json.dump(task_name_list, fh, indent=4) if existing_tasks: new_tasks_list = list(set(task_name_list) - set(existing_tasks)) new_tasks_file = args.json_file.replace('.json', '_new.json') if os.path.exists(new_tasks_file): suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M') os.rename(new_tasks_file, new_tasks_file + '.' + suffix) with open(new_tasks_file, 'w') as fh: json.dump(new_tasks_list, fh, indent=4) logger.debug('{} new tasks'.format(len(new_tasks_list)))