コード例 #1
0
def main():
    """
    Main entry point
    """
    hist_mon_day = filter_hadgem_stream2(
        DataRequest.objects.filter(
            institute__short_name='MOHC',
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name='hist-1950',
            variable_request__frequency__in=['mon', 'day', '3hr'],
            rip_code='r1i1p1f1'))

    ctrl_3hr = DataRequest.objects.filter(
        institute__short_name='NERC',
        climate_model__short_name='HadGEM3-GC31-HH',
        experiment__short_name='control-1950',
        variable_request__table_name__in=['3hr', 'E3hrPt']).distinct()

    ctrl_mon_day = filter_hadgem_stream2(
        DataRequest.objects.filter(
            institute__short_name='NERC',
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name='control-1950',
            variable_request__frequency__in=['mon', 'day'],
            rip_code='r1i1p1f1'))

    dreqs = (hist_mon_day | ctrl_mon_day | ctrl_3hr)

    logger.info('%s data requests found', dreqs.count())

    for dreq in dreqs:
        _remove_hh_dreq(dreq)
コード例 #2
0
def calc_item_size(path):
    """
    Calculate the size in bytes of all datasets below the specified `path`.

    :param str path: the path to the item
    :returns: the size in bytes
    """
    filter_terms = {'datafile__isnull': False, 'project__short_name': 'CMIP6'}
    components = path.split('/')
    index_to_str = {
        0: 'climate_model__short_name',
        1: 'experiment__short_name',
        2: 'rip_code',
        3: 'variable_request__table_name',
        4: 'variable_request__cmor_name'
    }
    for index, cmpnt in enumerate(components[7:]):
        filter_terms[index_to_str[index]] = cmpnt

    # Handle the EC-Earth AMIP special cases
    if (components[7] == 'EC-Earth3P'
            and components[8] == 'highresSST-present'):
        del filter_terms['climate_model__short_name']
        filter_terms['climate_model__short_name__in'] = [
            'EC-Earth3P', 'EC-Earth3'
        ]
    if (components[7] == 'EC-Earth3P-HR'
            and components[8] == 'highresSST-present'):
        del filter_terms['climate_model__short_name']
        filter_terms['climate_model__short_name__in'] = [
            'EC-Earth3P-HR', 'EC-Earth3-HR'
        ]

    # Find the data requests
    data_reqs = DataRequest.objects.filter(**filter_terms).distinct()

    # Handle HadGEM special cases
    if components[7].startswith('HadGEM'):
        stream1 = data_reqs.filter(rip_code='r1i1p1f1')
        stream2 = data_reqs.exclude(rip_code='r1i1p1f1')
        data_reqs = stream1 | filter_hadgem_stream2(stream2)

    # Calculate the volumes
    total_file_size = (DataFile.objects.filter(
        data_request__in=data_reqs).distinct().aggregate(
            Sum('size'))['size__sum'])

    return total_file_size
コード例 #3
0
def main(args):
    """
    Main entry point
    """
    start_year = 1948
    end_year = 2051

    # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter(
    #     climate_model__short_name='HadGEM3-GC31-HH',
    #     experiment__short_name='highres-future',
    #     # variable_request__frequency__in=['3hr'],
    #     datafile__isnull=False
    # ).exclude(
    #     variable_request__table_name__startswith='O'
    # ).exclude(
    #     variable_request__table_name__startswith='SI'
    # ).exclude(
    #     variable_request__table_name__startswith='Prim'
    # ).distinct())

    data_reqs = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-MH',
            experiment__short_name='spinup-1950',
            variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'],
            datafile__isnull=False).distinct())

    logger.info('Total data volume: {} Volume to restore: {}'.format(
        filesizeformat(get_request_size(data_reqs, start_year,
                                        end_year)).replace('\xa0', ' '),
        filesizeformat(
            get_request_size(data_reqs, start_year, end_year,
                             offline=True)).replace('\xa0', ' '),
    ))

    if args.create:
        jon = User.objects.get(username='******')
        rr = RetrievalRequest.objects.create(requester=jon,
                                             start_year=start_year,
                                             end_year=end_year)
        time_zone = datetime.timezone(datetime.timedelta())
        rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone)
        rr.save()

        rr.data_request.add(*data_reqs)

        logger.info('Retrieval request {} created.'.format(rr.id))
コード例 #4
0
def main(args):
    """
    Main entry point
    """
    start_year = 1948
    end_year = 2051

    data_reqs = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-LL',
            experiment__short_name='hist-1950',
            # rip_code__in=[f'r1i{i}p1f1' for i in range(2,9)],
            rip_code='r1i1p1f1',
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).distinct())

    logger.debug('Total data volume: {} Volume to restore: {}'.format(
        filesizeformat(get_request_size(data_reqs, start_year,
                                        end_year)).replace('\xa0', ' '),
        filesizeformat(
            get_request_size(data_reqs, start_year, end_year,
                             offline=True)).replace('\xa0', ' '),
    ))

    if args.create:
        jon = User.objects.get(username='******')
        rr = RetrievalRequest.objects.create(requester=jon,
                                             start_year=start_year,
                                             end_year=end_year)
        time_zone = datetime.timezone(datetime.timedelta())
        rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone)
        rr.save()

        rr.data_request.add(*data_reqs)

        logger.debug('Retrieval request {} created.'.format(rr.id))
コード例 #5
0
def main(args):
    """
    Main entry point

    Task names in the output JSON file are in the form:

    <climate-model>_<experiment>_<variant-label>_<table>_<variable>

    e.g.:

    HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl
    """
    existing_tasks = []
    if os.path.exists(args.json_file):
        with open(args.json_file) as fh:
            existing_tasks = json.load(fh)

        logger.debug('{} existing tasks loaded from file'.format(
            len(existing_tasks)))

    hh_pres_day = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name__in=['control-1950', 'hist-1950'],
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    hh_ctrl_3hr = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-HH',
        experiment__short_name='control-1950',
        variable_request__table_name='3hr',
        datafile__isnull=False).distinct()

    hh_future = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name='highres-future',
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    mh_spinup = filter_hadgem_stream2(
        DataRequest.objects.filter(climate_model__short_name='HadGEM3-GC31-MH',
                                   experiment__short_name='spinup-1950',
                                   variable_request__table_name__in=[
                                       'Omon', 'Oday', 'PrimOmon', 'PrimOday'
                                   ],
                                   datafile__isnull=False).distinct())

    hm_epfyz = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HM',
            experiment__short_name__in=[
                'control-1950', 'highres-future', 'hist-1950'
            ],
            rip_code='r1i1p1f1',
            variable_request__cmor_name__in=['epfy', 'epfz'],
            datafile__isnull=False).distinct())

    orca12_cice = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name__in=['HadGEM3-GC31-HH'],
            experiment__short_name__in=[
                'control-1950', 'highres-future', 'hist-1950'
            ],
            variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'],
            datafile__isnull=False).distinct())

    orca12_cice_spinup = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-MH',
            experiment__short_name='spinup-1950',
            variable_request__table_name__in=['SImon', 'SIday', 'PrimSIday'],
            datafile__isnull=False).distinct())

    # task querysets can be ORed together with |

    all_tasks = (hh_pres_day | hh_ctrl_3hr | hh_future | mh_spinup | hm_epfyz
                 | orca12_cice | orca12_cice_spinup)

    task_name_list = [
        '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name,
                                dr.experiment.short_name, dr.rip_code,
                                dr.variable_request.table_name,
                                dr.variable_request.cmor_name)
        for dr in all_tasks
    ]
    logger.debug('{} tasks in total'.format(len(all_tasks)))

    with open(args.json_file, 'w') as fh:
        json.dump(task_name_list, fh, indent=4)

    if existing_tasks:
        new_tasks_list = list(set(task_name_list) - set(existing_tasks))

        new_tasks_file = args.json_file.replace('.json', '_new.json')
        if os.path.exists(new_tasks_file):
            suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M')
            os.rename(new_tasks_file, new_tasks_file + '.' + suffix)
        with open(new_tasks_file, 'w') as fh:
            json.dump(new_tasks_list, fh, indent=4)
        logger.debug('{} new tasks'.format(len(new_tasks_list)))
コード例 #6
0
def main():
    """
    Run the processing.
    """
    amip_expts = ['highresSST-present', 'highresSST-future']
    coupled_expts = [
        'spinup-1950', 'hist-1950', 'control-1950', 'highres-future'
    ]
    stream1_2_expts = amip_expts + coupled_expts

    # MOHC stream 2 is members r1i2p2f1 to r1i15p1f1
    hadgem_stream2_members = [
        f'r1i{init_index}p1f1' for init_index in range(2, 16)
    ]

    other_models = DataRequest.objects.filter(
        project__short_name='PRIMAVERA',
        experiment__short_name__in=stream1_2_expts,
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            # Exclude HadGEM2 stream 2 for the moment
            climate_model__short_name__startswith='HadeGEM',
            rip_code__in=hadgem_stream2_members).exclude(
                # Exclude EC-Earth coupled r1i1p1f1
                institute__short_name='EC-Earth-Consortium',
                experiment__short_name__in=coupled_expts,
                rip_code='r1i1p1f1').distinct()

    hadgem_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            project__short_name='PRIMAVERA',
            experiment__short_name__in=stream1_2_expts,
            variable_request__table_name__startswith='Prim',
            climate_model__short_name__startswith='HadeGEM',
            rip_code__in=hadgem_stream2_members,
            datafile__isnull=False)).distinct()

    ec_earth_s1 = DataRequest.objects.filter(
        institute__short_name='EC-Earth-Consortium',
        experiment__short_name__in=coupled_expts,
        rip_code='r1i1p1f1',
        datafile__isnull=False).distinct()

    wp5 = DataRequest.objects.filter(experiment__short_name__in=[
        'primWP5-amv-neg', 'primWP5-amv-pos', 'dcppc-amv-neg', 'dcppc-amv-pos'
    ],
                                     datafile__isnull=False).distinct()

    prim_reqs = other_models | hadgem_s2 | ec_earth_s1 | wp5

    unique_expts = (prim_reqs.values_list(
        'institute__short_name', 'climate_model__short_name',
        'experiment__short_name', 'rip_code',
        'variable_request__table_name').distinct().order_by(
            'institute__short_name', 'climate_model__short_name',
            'experiment__short_name', 'rip_code',
            'variable_request__table_name'))

    with open(FILENAME, 'w') as fh:
        fh.write('drs_id, Volume (TB)\n')
        for inst_name, model_name, expt_name, rip_code, table_name in unique_expts:
            dreqs = prim_reqs.filter(institute__short_name=inst_name,
                                     climate_model__short_name=model_name,
                                     experiment__short_name=expt_name,
                                     rip_code=rip_code,
                                     variable_request__table_name=table_name)
            if dreqs:
                dreq_size = (DataFile.objects.filter(
                    data_request__in=dreqs).distinct().aggregate(
                        Sum('size'))['size__sum'])
                df = dreqs.first().datafile_set.first()
                drs_id = (f'PRIMAVERA.'
                          f'{df.activity_id.short_name}.'
                          f'{df.institute.short_name}.'
                          f'{df.climate_model.short_name}.'
                          f'{df.experiment.short_name}.'
                          f'{df.rip_code}.'
                          f'{df.variable_request.table_name}')
                if 'MPI' in drs_id and 'DCPP' in drs_id:
                    drs_id = (drs_id.replace('DCPP', 'primWP5').replace(
                        'dcppc', 'primWP5'))
                if 'NCAS' in drs_id:
                    drs_id = drs_id.replace('NCAS', 'NERC')
                fh.write(f'{drs_id}, {dreq_size / 1024**4}\n')
コード例 #7
0
def main(args):
    """
    Main entry point
    """
    start_year = 1948
    end_year = 2051

    # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter(
    #     climate_model__short_name='HadGEM3-GC31-LL',
    #     experiment__short_name='highres-future',
    #     rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i4p1f1'],
    #     variable_request__table_name__in=[
    #         '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
    #         'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
    #         'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
    #     ],
    #     datafile__isnull=False
    # ).exclude(
    #     variable_request__table_name__startswith='Prim'
    # ).distinct())

    data_reqs = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-MM',
            experiment__short_name='highres-future',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1'],
            variable_request__table_name__in=[
                '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
                'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
                'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
            ],
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    # data_reqs = filter_hadgem_stream2(DataRequest.objects.filter(
    #     climate_model__short_name='HadGEM3-GC31-HM',
    #     experiment__short_name='highres-future',
    #     rip_code='r1i3p1f1',
    #     variable_request__table_name__in=[
    #         '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
    #         'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
    #         'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
    #     ],
    #     datafile__isnull=False
    # ).exclude(
    #     variable_request__table_name__startswith='Prim'
    # ).distinct())

    logger.debug('Total data volume: {} Volume to restore: {}'.format(
        filesizeformat(get_request_size(data_reqs, start_year,
                                        end_year)).replace('\xa0', ' '),
        filesizeformat(
            get_request_size(data_reqs, start_year, end_year,
                             offline=True)).replace('\xa0', ' '),
    ))

    if args.create:
        jon = User.objects.get(username='******')
        rr = RetrievalRequest.objects.create(requester=jon,
                                             start_year=start_year,
                                             end_year=end_year)
        time_zone = datetime.timezone(datetime.timedelta())
        rr.date_created = datetime.datetime(2000, 1, 1, 0, 0, tzinfo=time_zone)
        rr.save()

        rr.data_request.add(*data_reqs)

        logger.debug('Retrieval request {} created.'.format(rr.id))
コード例 #8
0
def main(args):
    """
    Main entry point

    Task names in the output JSON file are in the form:

    <climate-model>_<experiment>_<variant-label>_<table>_<variable>

    e.g.:

    HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl
    """
    existing_tasks = []
    if os.path.exists(args.json_file):
        with open(args.json_file) as fh:
            existing_tasks = json.load(fh)

        logger.debug('{} existing tasks loaded from file'.format(
            len(existing_tasks)))

    ll_hist = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-LL',
            experiment__short_name='hist-1950',
            rip_code__in=[f'r1i{i}p1f1' for i in range(2, 9)],
            variable_request__table_name__in=[
                '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
                'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
                'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
            ],
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    mm_hist = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-MM',
            experiment__short_name='hist-1950',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1'],
            variable_request__table_name__in=[
                '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
                'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
                'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
            ],
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    hm_hist = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HM',
            experiment__short_name='hist-1950',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1'],
            variable_request__table_name__in=[
                '3hr', '6hrPlev', '6hrPlevPt', 'AERday', 'AERmon', 'Amon',
                'CF3hr', 'CFday', 'CFmon', 'E1hr', 'E3hr', 'E3hrPt', 'Eday',
                'EdayZ', 'Emon', 'EmonZ', 'Esubhr', 'LImon', 'Lmon', 'day'
            ],
            datafile__isnull=False).exclude(
                variable_request__table_name__startswith='Prim').distinct())

    # task querysets can be ORed together with |

    all_tasks = (ll_hist | mm_hist | hm_hist)

    task_name_list = [
        '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name,
                                dr.experiment.short_name, dr.rip_code,
                                dr.variable_request.table_name,
                                dr.variable_request.cmor_name)
        for dr in all_tasks
    ]
    logger.debug('{} tasks in total'.format(len(all_tasks)))

    with open(args.json_file, 'w') as fh:
        json.dump(task_name_list, fh, indent=4)

    if existing_tasks:
        new_tasks_list = list(set(task_name_list) - set(existing_tasks))

        new_tasks_file = args.json_file.replace('.json', '_new.json')
        if os.path.exists(new_tasks_file):
            suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M')
            os.rename(new_tasks_file, new_tasks_file + '.' + suffix)
        with open(new_tasks_file, 'w') as fh:
            json.dump(new_tasks_list, fh, indent=4)
        logger.debug('{} new tasks'.format(len(new_tasks_list)))
コード例 #9
0
def main(args):
    """
    Main entry point

    Task names in the output JSON file are in the form:

    <climate-model>_<experiment>_<variant-label>_<table>_<variable>

    e.g.:

    HadGEM3-GC31-LM_highresSST-present_r1i1p1f1_Amon_psl
    """
    existing_tasks = []
    if os.path.exists(args.json_file):
        with open(args.json_file) as fh:
            existing_tasks = json.load(fh)

        logger.debug('{} existing tasks loaded from file'.format(
            len(existing_tasks)))

    fx = DataRequest.objects.filter(
        climate_model__short_name__startswith='HadGEM3-GC31',
        variable_request__table_name__in=['fx', 'Ofx'],
        datafile__isnull=False).distinct()

    ll_hist = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-LL',
        experiment__short_name='hist-1950',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    ll_hist_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-LL',
            experiment__short_name='hist-1950',
            rip_code__in=[f'r1i{i}p1f1' for i in range(2, 9)],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday').distinct())

    mm_hist = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-MM',
        experiment__short_name='hist-1950',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    mm_hist_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-MM',
            experiment__short_name='hist-1950',
            rip_code__in=[f'r1i{i}p1f1' for i in range(2, 4)],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday').distinct())

    hm_hist = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-HM',
        experiment__short_name='hist-1950',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    hm_hist_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HM',
            experiment__short_name='hist-1950',
            rip_code__in=[f'r1i{i}p1f1' for i in range(2, 4)],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday').distinct())

    ll_ctrl = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-LL',
        experiment__short_name__in=['control-1950', 'spinup-1950'],
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    mm_ctrl = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-MM',
        experiment__short_name__in=['control-1950', 'spinup-1950'],
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    hm_ctrl = DataRequest.objects.filter(
        climate_model__short_name='HadGEM3-GC31-HM',
        experiment__short_name__in=['control-1950'],  # , 'spinup-1950'],
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday').distinct()

    hh_ctrl = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name__in=['control-1950'],  # , 'spinup-1950'],
            rip_code='r1i1p1f1',
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).distinct())

    hh_hist = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HH',
            experiment__short_name='hist-1950',
            rip_code='r1i1p1f1',
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).distinct())

    hh_future = filter_hadgem_stream2(
        DataRequest.objects.filter(climate_model__short_name='HadGEM3-GC31-HH',
                                   experiment__short_name='highres-future',
                                   rip_code='r1i1p1f1',
                                   variable_request__table_name__in=[
                                       'Primday', 'PrimdayPt', 'Prim3hr',
                                       'Prim3hrPt', 'Prim6hr', 'PrimOday',
                                       'PrimOmon'
                                   ],
                                   datafile__isnull=False).distinct())

    ll_mm_hm_amip_s1 = DataRequest.objects.filter(
        climate_model__short_name__in=[
            'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM'
        ],
        experiment__short_name='highresSST-present',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday', ).distinct()

    ll_mm_amip_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name__in=[
                'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM'
            ],
            experiment__short_name='highresSST-present',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i14p1f1', 'r1i15p1f1'],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday', ).distinct())

    hm_amip_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name='HadGEM3-GC31-HM',
            experiment__short_name='highresSST-present',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1'],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday', ).distinct())

    ll_mm_hm_famip_s1 = DataRequest.objects.filter(
        climate_model__short_name__in=[
            'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM'
        ],
        experiment__short_name='highresSST-future',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday', ).distinct()

    ll_mm_famip_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name__in=[
                'HadGEM3-GC31-LM', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM'
            ],
            experiment__short_name='highresSST-future',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i14p1f1', 'r1i15p1f1'],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday', ).distinct())

    fcoup_s1 = DataRequest.objects.filter(
        climate_model__short_name__in=[
            'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM'
        ],
        experiment__short_name='highres-future',
        rip_code='r1i1p1f1',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday', ).distinct()

    fcoup_s2 = filter_hadgem_stream2(
        DataRequest.objects.filter(
            climate_model__short_name__in=[
                'HadGEM3-GC31-LL', 'HadGEM3-GC31-MM', 'HadGEM3-GC31-HM'
            ],
            experiment__short_name='highres-future',
            rip_code__in=['r1i2p1f1', 'r1i3p1f1', 'r1i4p1f1'],
            variable_request__table_name__startswith='Prim',
            datafile__isnull=False).exclude(
                variable_request__table_name='PrimSIday', ).distinct())

    spinup = DataRequest.objects.filter(
        climate_model__short_name__startswith='HadGEM3-GC31',
        experiment__short_name='spinup-1950',
        variable_request__table_name__startswith='Prim',
        datafile__isnull=False).exclude(
            variable_request__table_name='PrimSIday', ).distinct()

    fx_Ofx = DataRequest.objects.filter(
        climate_model__short_name__startswith='HadGEM3-GC31',
        variable_request__table_name__contains='fx',
        datafile__isnull=False).distinct()

    # task querysets can be ORed together with |

    all_tasks = (fx | ll_hist | ll_hist_s2 | mm_hist | mm_hist_s2 | hm_hist
                 | hm_hist_s2 | ll_ctrl | mm_ctrl | hm_ctrl | hh_ctrl | hh_hist
                 | hh_future | ll_mm_hm_amip_s1 | ll_mm_amip_s2 | hm_amip_s2
                 | ll_mm_hm_famip_s1 | ll_mm_famip_s2 | fcoup_s1 | fcoup_s2
                 | spinup | fx_Ofx)

    task_name_list = [
        '{}_{}_{}_{}_{}'.format(dr.climate_model.short_name,
                                dr.experiment.short_name, dr.rip_code,
                                dr.variable_request.table_name,
                                dr.variable_request.cmor_name)
        for dr in all_tasks
    ]
    logger.debug('{} tasks in total'.format(len(all_tasks)))

    with open(args.json_file, 'w') as fh:
        json.dump(task_name_list, fh, indent=4)

    if existing_tasks:
        new_tasks_list = list(set(task_name_list) - set(existing_tasks))

        new_tasks_file = args.json_file.replace('.json', '_new.json')
        if os.path.exists(new_tasks_file):
            suffix = datetime.datetime.utcnow().strftime('%Y%m%d%H%M')
            os.rename(new_tasks_file, new_tasks_file + '.' + suffix)
        with open(new_tasks_file, 'w') as fh:
            json.dump(new_tasks_list, fh, indent=4)
        logger.debug('{} new tasks'.format(len(new_tasks_list)))