예제 #1
0
def main(args):
    """
    Main entry point
    """
    base_output_dir = Settings.get_solo().base_output_dir
    ceda_base = '/badc/cmip6/data'

    for ed in ESGFDataset.objects.filter(status='PUBLISHED'):
        for df in ed.data_request.datafile_set.order_by('name'):
            ceda_dir = os.path.join(ceda_base, construct_drs_path(df))
            ceda_path = os.path.join(ceda_dir, df.name)
            if df.directory:
                logger.error('Directory given {}'.format(
                    os.path.join(df.directory, df.name)))
            if os.path.exists(ceda_path):
                df.online = True
                df.directory = ceda_dir
                df.save()
            else:
                logger.error('Not in archive {}'.format(ceda_path))
                continue
            base_dir = os.path.join(base_output_dir, construct_drs_path(df))
            base_path = os.path.join(base_dir, df.name)
            if os.path.exists(base_path):
                os.remove(base_path)
                logger.error('Deleted {}'.format(base_path))
            if not os.path.exists(base_dir):
                os.makedirs(base_dir)
            os.symlink(ceda_path, base_path)
예제 #2
0
def main(args):
    """
    Main entry point
    """
    dfs = DataFile.objects.filter(climate_model__short_name='MPI-ESM1-2-XR',
                                  experiment__short_name='highres-future',
                                  version='v20190617')

    prim_gws = '/gws/nopw/j04/primavera5/stream1'

    old_dirs = []

    for df in dfs:
        old_drs_path = construct_drs_path(df)
        df.version = 'v20190517'
        df.save()
        if df.online:
            # file itself
            gws = get_gws(df.directory)
            old_dir = df.directory
            new_dir = os.path.join(gws, construct_drs_path(df))
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_dir not in old_dirs:
                old_dirs.append(old_dir)

            # sym link
            if not is_same_gws(df.directory, prim_gws):
                old_sym_dir = os.path.join(prim_gws, old_drs_path)
                old_sym = os.path.join(old_sym_dir, df.name)
                # TODO next line doesn't work as this is now a broken symlink so returns false
                if os.path.exists(old_sym):
                    if os.path.islink(old_sym):
                        os.remove(old_sym)
                    else:
                        logger.warning(f'Not symlink as expected: {old_sym}')
                new_sym_dir = os.path.join(prim_gws, construct_drs_path(df))
                if not os.path.exists(new_sym_dir):
                    os.makedirs(new_sym_dir)
                os.symlink(os.path.join(new_dir, df.name),
                           os.path.join(new_sym_dir, df.name))
                if old_sym_dir not in old_dirs:
                    old_dirs.append(old_sym_dir)

    logger.debug(f'Removing {len(old_dirs)} old dirs')
    for old_dir in old_dirs:
        delete_drs_dir(old_dir)
예제 #3
0
def main(args):
    """
    Main entry point
    """
    base_output_dir = Settings.get_solo().base_output_dir

    for data_file in DataFile.objects.filter(online=True):
        gws_pattern = r'^/group_workspaces/jasmin2/primavera(\d)/(\S*)'
        gws = re.match(gws_pattern, data_file.directory)
        if not gws:
            logger.error('No GWS match for {}'.format(data_file.name))
            continue
        new_gws = '/gws/nopw/j04/primavera' + gws.group(1)
        new_dir = os.path.join(new_gws, gws.group(2))
        new_path = os.path.join(new_dir, data_file.name)
        if not os.path.exists(new_path):
            logger.error('Cannot find {}'.format(new_path))
            continue
        data_file.directory = new_dir
        data_file.save()

        if not is_same_gws(data_file.directory, base_output_dir):
            link_path = os.path.join(base_output_dir,
                                     construct_drs_path(data_file),
                                     data_file.name)
            # it's got to be a link but check anyway
            if os.path.islink(link_path):
                os.remove(link_path)
                os.symlink(os.path.join(data_file.directory, data_file.name),
                           link_path)
            else:
                logger.error('Expected a link but found a file at {}'.
                             format(link_path))
예제 #4
0
def main():
    """
    Main entry point
    """
    affected_files = DataFile.objects.filter(
        climate_model__short_name='EC-Earth3P-HR',
        experiment__short_name='highresSST-present',
        rip_code='r1i1p1f1',
        variable_request__table_name='E3hr',
        variable_request__cmor_name__in=['clivi', 'rsdt']).distinct().order_by(
            'variable_request__table_name', 'variable_request__cmor_name')

    num_files = affected_files.count()
    logger.debug(f'{num_files} affected files found')

    for df in affected_files:
        if not df.directory.startswith(ARCHIVE_BASE):
            logger.error(f'{df.name} not in {ARCHIVE_BASE}')
            continue
        new_dir = os.path.join(BASE_GWS, construct_drs_path(df))
        new_path = os.path.join(new_dir, df.name)
        old_path = os.path.join(df.directory, df.name)
        if not os.path.exists(new_path):
            os.makedirs(new_path)
        shutil.copy(old_path, new_path)
        df.directory = new_dir
        df.save()
예제 #5
0
def main():
    """
    Main entry point
    """
    affected_files = DataFile.objects.filter(
        climate_model__short_name='HadGEM3-GC31-HH',
        variable_request__table_name__in=['SImon', 'SIday',
                                          'PrimSIday']).distinct().order_by(
                                              'variable_request__table_name',
                                              'variable_request__cmor_name')

    num_files = affected_files.count()
    logger.debug(f'{num_files} affected files found')

    for df in affected_files:
        if not df.directory.startswith(ARCHIVE_BASE):
            logger.error(f'{df.name} not in {ARCHIVE_BASE}')
            continue
        new_dir = os.path.join(BASE_GWS, construct_drs_path(df))
        new_path = os.path.join(new_dir, df.name)
        old_path = os.path.join(df.directory, df.name)
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)
        shutil.copy(old_path, new_path)
        df.directory = new_dir
        df.save()
예제 #6
0
def main():
    """Main entry point"""
    data_reqs = DataRequest.objects.filter(
        climate_model__short_name='EC-Earth3',
        experiment__short_name='highresSST-present',
        rip_code__in=['r1i1p1f1'],
        variable_request__table_name='Amon',
        variable_request__cmor_name__in=[
            'clt', 'hus', 'pr', 'rlut', 'rlutcs', 'rsut', 'rsutcs', 'ta',
            'tas', 'ts', 'ua', 'va', 'zg'
        ])
    logger.debug('{} data requests found'.format(data_reqs.count()))

    for data_req in data_reqs:
        for data_file in data_req.datafile_set.all():
            if not data_file.online:
                raise ValueError('{} is not online'.format(data_file.name))
            src_path = os.path.join(data_file.directory, data_file.name)
            dest_dir = os.path.join(TEST_DATA_DIR,
                                    construct_drs_path(data_file))
            if not os.path.exists(dest_dir):
                os.makedirs(dest_dir)
            dest_path = os.path.join(dest_dir, data_file.name)
            shutil.copyfile(src_path, dest_path)
            data_file.directory = dest_dir
            data_file.save()
예제 #7
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting file structure scan.')

    for nc_file in ilist_files(args.directory):
        nc_file_name = os.path.basename(nc_file)
        db_files = DataFile.objects.filter(name=nc_file_name)

        if db_files.count() == 0:
            logger.error('File not found in database: {}'.format(nc_file))
        elif db_files.count() > 1:
            logger.error('{} entries found in database for file: {}'.
                         format(db_files.count(), nc_file))
        else:
            db_file = db_files.first()

        act_size = os.path.getsize(nc_file)
        if act_size != db_file.size:
            logger.info('File %s has size %d', db_file.name, act_size)
            db_file.online = False
            db_file.directory = None
            db_file.save()

            os.remove(nc_file)
            if not is_same_gws(nc_file, BASE_OUTPUT_DIR):
                sym_link_path = os.path.join(BASE_OUTPUT_DIR,
                                             construct_drs_path(db_file),
                                             db_file.name)
                try:
                    if os.path.exists(sym_link_path):
                        os.remove(sym_link_path)
                except OSError:
                    logger.error('Unable to delete sym link %s', sym_link_path)
def scan_database():
    """
    Start the scan of the database.
    """
    logger.debug('Starting database scan.')

    for data_file in DataFile.objects.filter(online=True).iterator():
        full_path = os.path.join(data_file.directory, data_file.name)
        if not os.path.exists(full_path):
            logger.warning('File cannot be found on disk, status changed to '
                           'offline: {}'.format(full_path))
            data_file.online = False
            data_file.directory = None
            data_file.save()
            continue

        if not is_same_gws(data_file.directory, BASE_OUTPUT_DIR):
            sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(data_file))
            sym_link_path = os.path.join(sym_link_dir,
                                         data_file.name)
            if not os.path.exists(sym_link_path):
                if not os.path.exists(sym_link_dir):
                    os.makedirs(sym_link_dir)
                os.symlink(full_path, sym_link_path)
                logger.warning('Created symlink for file {} from {}'.
                               format(data_file.name, sym_link_path))

    logger.debug('Completed database scan.')
예제 #9
0
def main(args):
    """
    Main entry point
    """
    affected_files = DataFile.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='control-1950',
        variable_request__table_name=args.table_name
    ).distinct().order_by(
        'variable_request__table_name', 'variable_request__cmor_name'
    )

    num_files = affected_files.count()
    logger.debug(f'{num_files} affected files found')

    for df in affected_files:
        if not df.directory.startswith(ARCHIVE_BASE):
            logger.error(f'{df.name} not in {ARCHIVE_BASE}')
            continue
        new_dir = os.path.join(BASE_GWS, construct_drs_path(df))
        new_path = os.path.join(new_dir, df.name)
        old_path = os.path.join(df.directory, df.name)
        if not os.path.exists(new_path):
            os.makedirs(new_path)
        shutil.copy(old_path, new_path)
        df.directory = new_dir
        df.save()
예제 #10
0
def main(args):
    """
    Main entry point
    """
    for ret_req in RET_REQS:
        rr = RetrievalRequest.objects.get(id=ret_req)
        logger.debug('Starting retrieval request {}'.format(ret_req))
        for dr in rr.data_request.all():
            logger.debug('Starting data request {}'.format(dr))
            num_files_moved = 0
            for df in dr.datafile_set.filter(online=True):
                if df.directory.startswith(INCOMING_DIR):
                    drs_path = construct_drs_path(df)
                    dest_dir = os.path.join(NEW_BASE_OUTPUT_DIR, drs_path)
                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)
                    dest_path = os.path.join(dest_dir, df.name)
                    src_path = os.path.join(df.directory, df.name)
                    # copy the file
                    shutil.copy(src_path, dest_path)
                    # check its checksum
                    checksum = adler32(dest_path)
                    if checksum != df.checksum_set.first().checksum_value:
                        msg = 'Checksum does not match for {}'.format(df.name)
                        raise ValueError(msg)
                    # construct a sym link
                    primary_path = os.path.join(BASE_OUTPUT_DIR, drs_path)
                    if not os.path.exists(primary_path):
                        os.makedirs(primary_path)
                    os.symlink(dest_path, os.path.join(primary_path, df.name))
                    # update the DB
                    df.directory = dest_dir
                    df.save()
                    num_files_moved += 1
            logger.debug('{} files moved'.format(num_files_moved))
예제 #11
0
def main(args):
    """
    Main entry point
    """
    copy_dir = '/gws/nopw/j04/primavera5/upload/CMCC/fluxes'

    dreqs = DataRequest.objects.filter(
        institute__short_name='CMCC',
        experiment__short_name__in=[
            'highres-future', 'highresSST-future', 'hist-1950'
        ],
        variable_request__cmor_name__in=['rlut', 'rlutcs', 'rsutcs'],
        datafile__isnull=False).distinct()

    num_dreqs = dreqs.count()
    expected_dreqs = 18
    if num_dreqs != expected_dreqs:
        logger.error(f'Found {num_dreqs} but was expecting {expected_dreqs}.')
        sys.exit(1)

    for dreq in dreqs:
        logger.info(dreq)
        for df in dreq.datafile_set.order_by('name'):
            new_dir = os.path.join(copy_dir, construct_drs_path(df))
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            shutil.copyfile(os.path.join(df.directory, df.name),
                            os.path.join(new_dir, df.name))
예제 #12
0
def main(args):
    """Main entry point"""
    base_dir = Settings.get_solo().base_output_dir

    for extracted_file in ilist_files(args.top_dir):
        found_name = os.path.basename(extracted_file)

        try:
            data_file = DataFile.objects.get(name=found_name)
        except django.core.exceptions.ObjectDoesNotExist:
            logger.warning('Cannot find DMT entry. Skipping {}'.
                           format(extracted_file))
            continue

        found_checksum = adler32(extracted_file)
        if not found_checksum == data_file.checksum_set.first().checksum_value:
            logger.warning("Checksum doesn't match. Skipping {}".
                           format(found_name))
            continue

        dest_dir = os.path.join(get_gws_any_dir(extracted_file), 'stream1',
                                construct_drs_path(data_file))
        dest_path = os.path.join(dest_dir, found_name)
        if os.path.exists(dest_path):
            logger.warning('Skipping {} as it already exists at {}'.
                           format(found_name, dest_path))
            continue
        # create the directory if it doesn't exist
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)

        os.rename(extracted_file, dest_path)

        # create a link from the base dir
        if not is_same_gws(dest_path, base_dir):
            link_dir = os.path.join(base_dir, construct_drs_path(data_file))
            link_path = os.path.join(link_dir, data_file.name)
            if not  os.path.exists(link_dir):
                os.makedirs(link_dir)
            os.symlink(dest_path, link_path)

        data_file.online = True
        data_file.directory = dest_dir
        data_file.save()
예제 #13
0
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        institute__short_name='MPI-M',
        experiment__short_name__in=['control-1950', 'hist-1950'],
        variable_request__cmor_name='tos',
        datafile__isnull=False).distinct()

    logger.debug(f'Found {dreqs.count()} datasets')

    for dreq in dreqs:
        if dreq.esgfdataset_set.all():
            # ESGF dataset's been created...
            esgf = dreq.esgfdataset_set.first()
            if esgf.status == 'PUBLISHED':
                # ... and published so the data's in the CEDA archive
                # and symlinked from the PRIMAVERA data structure
                # All sym links will be in one directory
                set_dir = os.path.join(
                    BASE_OUTPUT_DIR,
                    construct_drs_path(dreq.datafile_set.first()))
                for df in dreq.datafile_set.all():
                    file_path = os.path.join(set_dir, df.name)
                    if not os.path.islink(file_path):
                        logger.warning(f'Expected a sym link {file_path}')
                        continue
                    try:
                        os.remove(file_path)
                    except OSError as exc:
                        logger.error(str(exc))
                    df.online = False
                    df.directory = None
                    df.save()
                delete_drs_dir(set_dir)
                logger.debug(f'Removed files for ESGFDataset {esgf}')
                esgf.status = 'CREATED'
                esgf.save()
                continue
        # The data's not been published so delete the files and their sym links
        delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR)
        logger.debug(f'Removed files for DataRequest {dreq}')
        dreq.datafile_set.update(directory=None, online=False)

    for dreq in dreqs:
        dreq.datafile_set.update(version='v20191129')
예제 #14
0
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='control-1950',
        datafile__isnull=False).exclude(variable_request__table_name__in=[
            'LImon', 'Lmon', 'Oday', 'Omon', 'PrimOday', 'PrimOmon', 'SIday',
            'SImon'
        ]).distinct().order_by('variable_request__table_name',
                               'variable_request__cmor_name')

    num_dreqs = dreqs.count()
    logger.info(f'{num_dreqs} data requests found')

    for dreq in dreqs:
        try:
            df = dreq.datafile_set.get(name__contains='198207')
        except django.core.exceptions.ObjectDoesNotExist:
            logger.error(f'{dreq} no files found in DMT')
            continue
        logger.debug(f'Replacing {df.name}')
        file_name = df.name
        old_dir = df.directory
        old_path = os.path.join(old_dir, file_name)
        drs_path = construct_drs_path(df)
        incoming_dir = os.path.join(BASE_INCOMING_DIR,
                                    drs_path).replace(df.version, 'v20200401')
        incoming_path = os.path.join(incoming_dir, file_name)
        if not os.path.exists(incoming_path):
            logger.error(f'{incoming_path} not found')
        # Copy
        os.remove(old_path)
        shutil.copy(incoming_path, old_path)
        df.tape_url = 'et:21500'
        df.incoming_directory = incoming_dir
        df.save()
        checksum = md5(old_path)
        df.checksum_set.all().delete()
        df.tapechecksum_set.all().delete()
        Checksum.objects.create(data_file=df,
                                checksum_value=checksum,
                                checksum_type='ADLER32')
        TapeChecksum.objects.create(data_file=df,
                                    checksum_value=checksum,
                                    checksum_type='ADLER32')
예제 #15
0
    def __init__(self, datafile, new_value, update_file_only=False):
        """
        Initialise the class

        :param pdata_apps.models.DataFile datafile: the file to update
        :param str new_value: the new value to apply
        :param bool update_file_only: if true then update just the file and
            don't make any changes to the database.
        """
        self.datafile = datafile
        self.new_value = new_value
        self.old_filename = self.datafile.name
        self.old_directory = self.datafile.directory
        self.old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                             construct_drs_path(self.datafile))
        self.new_filename = None
        self.new_directory = None

        self.update_file_only = update_file_only
예제 #16
0
def main(args):
    """
    Main entry point
    """
    datasets = ESGFDataset.objects.filter(
        data_request__institute__short_name='MPI-M',
        data_request__experiment__short_name='highresSST-present',
        status='PUBLISHED')

    logger.debug(f'Found {datasets.count()} datasets')

    for dataset in datasets:
        for datafile in dataset.data_request.datafile_set.all():
            dest_dir = os.path.join(TAPE_WRITE_DIR,
                                    construct_drs_path(datafile))
            if not os.path.exists(dest_dir):
                os.makedirs(dest_dir)
            shutil.copy(os.path.join(datafile.directory, datafile.name),
                        dest_dir)
        logger.debug(f'Copied {dataset}')
예제 #17
0
def main(args):
    """
    Main entry point
    """
    data_req = DataRequest.objects.get(
        climate_model__short_name='EC-Earth3',
        experiment__short_name='highresSST-present',
        variable_request__table_name='day',
        variable_request__cmor_name='va'
    )

    links_created = 0
    for data_file in data_req.datafile_set.all():
        drs_path = construct_drs_path(data_file)
        stream1_dir = os.path.join(BASE_OUTPUT_DIR, drs_path)
        stream1_file = os.path.join(stream1_dir, data_file.name)
        dest_file = os.path.join(NEW_BASE_OUTPUT_DIR, drs_path, data_file.name)
        if not os.path.exists(stream1_file):
            os.symlink(dest_file, stream1_file)
            links_created += 1

    logger.debug('{} links created'.format(links_created))
예제 #18
0
def main(args):
    """
    Main entry point
    """
    bad_files = list_files(BAD_DIR)

    logger.debug(f'{len(bad_files)} files found')

    for bf in bad_files:
        df = DataFile.objects.get(name=os.path.basename(bf))
        new_dir = os.path.join(BASE_OUTPUT_DIR, construct_drs_path(df))
        new_path = os.path.join(new_dir, df.name)
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)
        if os.path.exists(new_path):
            if os.path.islink(new_path):
                os.remove(new_path)
            else:
                logger.error(f'{new_path} is not a link')
                continue
        os.rename(bf, new_path)
        df.directory = new_dir
        df.save()
예제 #19
0
    def _rename_file(self):
        """
        Rename the file on disk and move to its new directory. Update the link
        from the primary directory.
        """
        if not os.path.exists(self.new_directory):
            os.makedirs(self.new_directory)

        os.rename(os.path.join(self.old_directory, self.old_filename),
                  os.path.join(self.new_directory, self.new_filename))

        # check for empty directory
        if not os.listdir(self.old_directory):
            delete_drs_dir(self.old_directory)

        # Update the symbolic link if required
        if not is_same_gws(self.old_directory, BASE_OUTPUT_DIR):
            old_link_path = os.path.join(self.old_sym_link_dir,
                                         self.old_filename)
            if os.path.lexists(old_link_path):
                if not os.path.islink(old_link_path):
                    logger.error("{} exists and isn't a symbolic link.".format(
                        old_link_path))
                    raise SymLinkIsFileError(old_link_path)
                else:
                    # it is a link so remove it
                    os.remove(old_link_path)
                    # check for empty directory
                    if not os.listdir(self.old_sym_link_dir):
                        delete_drs_dir(self.old_sym_link_dir)

            new_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(self.datafile))
            if not os.path.exists(new_link_dir):
                os.makedirs(new_link_dir)
            os.symlink(os.path.join(self.new_directory, self.new_filename),
                       os.path.join(new_link_dir, self.new_filename))
예제 #20
0
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='control-1950',
        datafile__isnull=False
    ).distinct().order_by(
        'variable_request__table_name', 'variable_request__cmor_name'
    )

    num_dreqs = dreqs.count()
    logger.info(f'{num_dreqs} data requests found')

    for dreq in dreqs:
        dreq.datafile_set.update(version='v20200917')
        for df in dreq.datafile_set.filter(online=True).order_by('name'):
            old_dir = df.directory
            old_path = os.path.join(old_dir, df.name)
            if not os.path.exists(old_path):
                logger.error(f'{old_path} not found')
                continue
            new_dir = os.path.join(get_gws(df.directory),
                                   construct_drs_path(df))
            if df.directory != new_dir:
                if not os.path.exists(new_dir):
                    os.makedirs(new_dir)

                os.rename(old_path,
                          os.path.join(new_dir, df.name))
                df.directory = new_dir
                df.save()

            # Delete original dir if it's now empty
            if not os.listdir(old_dir):
                delete_drs_dir(old_dir)
예제 #21
0
def main(args):
    """
    Main entry point
    """
    dreqs = DataRequest.objects.filter(
        climate_model__short_name = 'EC-Earth3P',
        experiment__short_name__in = ['primWP5-amv-pos','primWP5-amv-neg'],
        variable_request__cmor_name__in = ['rsus', 'rlus']
    ).distinct()

    num_dreqs = dreqs.count()
    if num_dreqs != 100:
        logger.error(f'{num_dreqs} affected data requests found')
        sys.exit(1)

    for dreq in dreqs:
        for df in dreq.datafile_set.all():
            new_dir = os.path.join(BASE_GWS, construct_drs_path(df))
            new_path = os.path.join(new_dir, df.name)
            old_path = os.path.join(df.directory, df.name)
            if not os.path.exists(new_dir):
                os.makedirs(new_dir)
            shutil.copy(old_path, new_path)
        delete_files(dreq.datafile_set.all(), BASE_OUTPUT_DIR)
예제 #22
0
def main(args):
    """
    Main entry point
    """
    ret_req = RetrievalRequest.objects.get(id=args.retrieval_id)

    all_files = DataFile.objects.filter(
        data_request__in=ret_req.data_request.all())
    time_units = all_files[0].time_units
    calendar = all_files[0].calendar
    start_float = cf_units.date2num(
        datetime.datetime(ret_req.start_year, 1, 1), time_units, calendar)
    end_float = cf_units.date2num(
        datetime.datetime(ret_req.end_year + 1, 1, 1), time_units, calendar)
    data_files = all_files.filter(start_time__gte=start_float,
                                  end_time__lt=end_float)

    num_files = 0
    for data_file in data_files:
        drs_path = construct_drs_path(data_file)
        dest_dir = os.path.join(NEW_BASE_OUTPUT_DIR, drs_path)
        if dest_dir == data_file.directory:
            logger.warning('Skipping file as already in destination directory '
                           '{}'.format(data_file.name))
            continue
        if not os.path.exists(dest_dir):
            os.makedirs(dest_dir)
        shutil.move(os.path.join(data_file.directory, data_file.name),
                    dest_dir)
        os.symlink(os.path.join(dest_dir, data_file.name),
                   os.path.join(data_file.directory, data_file.name))
        data_file.directory = dest_dir
        data_file.save()
        num_files += 1

    logger.debug('Moved {} files'.format(num_files))
예제 #23
0
def move_dirs(data_req, new_gws):
    """
    Move the files

    :param pdata_app.models.DataRequest data_req: the data request to move
    :param int new_gws: the number of the gws to move to
    """
    single_dir = '{}{}'.format(COMMON_GWS_NAME, new_gws)
    existing_dirs = data_req.directories()
    # ignore data that is offline
    if None in existing_dirs:
        existing_dirs.remove(None)
    use_single_dir = False
    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            use_single_dir = True
            break
    if not use_single_dir:
        # As a quick sanity check, generate an error if there is no
        # data already in the requested output directory
        logger.error('The new output directory is {} but no data from '
                     'this variable is currently in this directory.'.
                     format(single_dir))
        sys.exit(1)

    for exist_dir in existing_dirs:
        if exist_dir.startswith(single_dir):
            continue
        files_to_move = data_req.datafile_set.filter(directory=exist_dir)
        logger.debug('Moving {} files from {}'.format(
            files_to_move.count(), exist_dir))
        for file_to_move in files_to_move:
            # Move the file
            src = os.path.join(exist_dir, file_to_move.name)
            dest_path = os.path.join(single_dir, 'stream1',
                                     construct_drs_path(file_to_move))
            if not os.path.exists(dest_path):
                os.makedirs(dest_path)
            dest = os.path.join(dest_path, file_to_move.name)
            # remove existing link if about to write over it
            if dest.startswith(BASE_OUTPUT_DIR):
                if os.path.exists(dest):
                    if os.path.islink(dest):
                        os.remove(dest)
            # Move the file
            shutil.move(src, dest)
            # Update the file's location in the DB
            file_to_move.directory = dest_path
            file_to_move.save()
            # Check that it was safely copied
            actual_checksum = adler32(dest)
            db_checksum = file_to_move.checksum_set.first().checksum_value
            if not actual_checksum == db_checksum:
                logger.error('For {}\ndatabase checksum: {}\n'
                             'actual checksum: {}'.
                             format(dest, db_checksum, actual_checksum))
                sys.exit(1)
            # Update the symlink
            if not is_same_gws(dest_path, BASE_OUTPUT_DIR):
                primary_path_dir = os.path.join(
                    BASE_OUTPUT_DIR,
                    construct_drs_path(file_to_move))
                primary_path = os.path.join(primary_path_dir,
                                            file_to_move.name)
                if os.path.lexists(primary_path):
                    if not os.path.islink(primary_path):
                        logger.error("{} exists and isn't a symbolic "
                                     "link.".format(primary_path))
                        sys.exit(1)
                    else:
                        # it is a link so remove it
                        os.remove(primary_path)
                if not os.path.exists(primary_path_dir):
                    os.makedirs(primary_path_dir)
                os.symlink(dest, primary_path)

        delete_drs_dir(exist_dir)
예제 #24
0
def main(args):
    """
    Main entry point
    """
    dreqs_hr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-HR4',
        experiment__short_name__in=['hist-1950', 'control-1950'],
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs_vhr = DataRequest.objects.filter(
        climate_model__short_name='CMCC-CM2-VHR4',
        experiment__short_name='hist-1950',
        variable_request__table_name__startswith='SI',
        datafile__isnull=False).distinct()

    dreqs = dreqs_hr | dreqs_vhr

    logger.debug(f'Found {dreqs.count()} data requests')

    for dreq in dreqs:
        logger.debug(f'Processing {dreq}')
        old_directories = []
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'Not online {df.name}')
                continue
            if df.version == NEW_VERSION:
                logger.warning(f'Already at {NEW_VERSION} {df.name}')
                continue
            # save the sym link directory before we make any changes
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                                construct_drs_path(df))
            # now get back to updating the version
            df.version = NEW_VERSION
            gws = get_gws(df.directory)
            new_dir = os.path.join(gws, construct_drs_path(df))
            old_directory = df.directory
            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_directory not in old_directories:
                old_directories.append(old_directory)

            # Update any sym links too
            if not is_same_gws(BASE_OUTPUT_DIR, df.directory):
                sym_link_path = os.path.join(old_sym_link_dir, df.name)
                if os.path.lexists(sym_link_path):
                    if os.path.islink(sym_link_path):
                        os.remove(sym_link_path)
                        if old_sym_link_dir not in old_directories:
                            old_directories.append(old_sym_link_dir)
                sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
                if not os.path.exists(sym_link_dir):
                    os.makedirs(sym_link_dir)
                sym_link_path = os.path.join(sym_link_dir, df.name)
                os.symlink(os.path.join(df.directory, df.name), sym_link_path)

        for directory in old_directories:
            if not os.listdir(directory):
                delete_drs_dir(directory)
            else:
                logger.error(f'Not empty {directory}')
예제 #25
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting incoming_to_drs.py')

    data_sub = _get_submission_object(os.path.normpath(args.directory))

    if not args.alternative:
        drs_base_dir = BASE_OUTPUT_DIR
    else:
        drs_base_dir = args.alternative

    errors_encountered = False

    for data_file in data_sub.datafile_set.order_by('name'):
        # make full path of existing file
        existing_path = os.path.join(data_file.directory, data_file.name)

        # make full path of where it will live
        drs_sub_path = construct_drs_path(data_file)
        drs_dir = os.path.join(drs_base_dir, drs_sub_path)
        drs_path = os.path.join(drs_dir, data_file.name)

        # check the destination directory exists
        if not os.path.exists(drs_dir):
            os.makedirs(drs_dir)

        # link if on same GWS, or else copy
        this_file_error = False
        try:
            os.rename(existing_path, drs_path)
        except OSError as exc:
            logger.error('Unable to link from {} to {}. {}'.format(
                existing_path, drs_path, str(exc)))
            errors_encountered = True
            this_file_error = True

        # update the file's location in the database
        if not this_file_error:
            data_file.directory = drs_dir
            if not data_file.online:
                data_file.online = True
            data_file.save()

        # if storing the files in an alternative location, create a sym link
        # from the primary DRS structure to the file
        if not is_same_gws(BASE_OUTPUT_DIR, drs_base_dir):
            primary_path = os.path.join(BASE_OUTPUT_DIR, drs_sub_path)
            try:
                if not os.path.exists(primary_path):
                    os.makedirs(primary_path)

                os.symlink(drs_path, os.path.join(primary_path,
                                                  data_file.name))
            except OSError as exc:
                logger.error('Unable to link from {} to {}. {}'.format(
                    drs_path, os.path.join(primary_path, data_file.name),
                    str(exc)))
                errors_encountered = True

    # summarise what happened and keep the DB updated
    if not errors_encountered:
        logger.debug('All files copied with no errors. Data submission '
                     'incoming directory can be deleted.')
    else:
        logger.error('Errors were encountered. Please fix these before '
                     'deleting the incoming directory.')

    logger.debug('Completed incoming_to_drs.py')
예제 #26
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting delete_request.py for retrieval {}'.format(
        args.retrieval_id))

    deletion_retrieval = match_one(RetrievalRequest, id=args.retrieval_id)
    if not deletion_retrieval:
        logger.error('Unable to find retrieval id {}'.format(
            args.retrieval_id))
        sys.exit(1)

    if deletion_retrieval.date_deleted:
        logger.error('Retrieval {} was already deleted, at {}.'.format(
            deletion_retrieval.id,
            deletion_retrieval.date_deleted.strftime('%Y-%m-%d %H:%M')))
        sys.exit(1)

    if not deletion_retrieval.data_finished:
        logger.error('Retrieval {} is not marked as finished.'.format(
            deletion_retrieval.id))
        sys.exit(1)

    problems_encountered = False
    directories_found = []
    base_output_dir = Settings.get_solo().base_output_dir

    # loop through all of the data requests in this retrieval
    for data_req in deletion_retrieval.data_request.all():
        online_req_files = data_req.datafile_set.filter(
            online=True, directory__isnull=False)
        files_to_delete = date_filter_files(online_req_files,
                                            deletion_retrieval.start_year,
                                            deletion_retrieval.end_year)

        if files_to_delete is None:
            continue

        if not args.force:
            # find any other retrieval requests that still need this data
            other_retrievals = RetrievalRequest.objects.filter(
                data_request=data_req, data_finished=False)
            # loop through the retrieval requests that still need this data
            # request
            for ret_req in other_retrievals:
                ret_online_files = data_req.datafile_set.filter(
                    online=True, directory__isnull=False)
                ret_filtered_files = date_filter_files(ret_online_files,
                                                       ret_req.start_year,
                                                       ret_req.end_year)
                if ret_filtered_files is None:
                    continue
                # remove from the list of files to delete the ones that we have
                # just found are still needed
                files_to_delete = files_to_delete.difference(
                    ret_filtered_files)
                # list the parts of the data request that are still required
                logger.debug("{} {} to {} won't be deleted".format(
                    data_req, ret_req.start_year, ret_req.end_year))

        # don't (try to) delete anything that's in the CEDA archive
        files_to_delete.exclude(directory__startswith=CEDA_ARCHIVE)

        # do the deleting
        if args.dryrun:
            logger.debug('{} {} files can be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
        else:
            logger.debug('{} {} files will be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
            for data_file in files_to_delete:
                old_file_dir = data_file.directory
                try:
                    os.remove(os.path.join(data_file.directory,
                                           data_file.name))
                except OSError as exc:
                    logger.error(str(exc))
                    problems_encountered = True
                else:
                    if data_file.directory not in directories_found:
                        directories_found.append(data_file.directory)
                    data_file.online = False
                    data_file.directory = None
                    data_file.save()

                # if a symbolic link exists from the base output directory
                # then delete this too
                if not old_file_dir.startswith(base_output_dir):
                    sym_link_dir = os.path.join(base_output_dir,
                                                construct_drs_path(data_file))
                    sym_link = os.path.join(sym_link_dir, data_file.name)
                    if not os.path.islink(sym_link):
                        logger.error(
                            "Expected {} to be a link but it isn't. "
                            "Leaving this file in place.".format(sym_link))
                        problems_encountered = True
                    else:
                        try:
                            os.remove(sym_link)
                        except OSError as exc:
                            logger.error(str(exc))
                            problems_encountered = True
                        else:
                            if sym_link_dir not in directories_found:
                                directories_found.append(sym_link_dir)

    if not args.dryrun:
        # delete any empty directories
        for directory in directories_found:
            if not os.listdir(directory):
                delete_drs_dir(directory)

        # set date_deleted in the db
        if not problems_encountered:
            deletion_retrieval.date_deleted = timezone.now()
            deletion_retrieval.save()
        else:
            logger.error(
                'Errors were encountered and so retrieval {} has not '
                'been marked as deleted. All possible files have been '
                'deleted.'.format(args.retrieval_id))

    logger.debug('Completed delete_request.py for retrieval {}'.format(
        args.retrieval_id))
예제 #27
0
 def _construct_directory(self):
     """
     Construct the new directory path.
     """
     self.new_directory = os.path.join(get_gws(self.datafile.directory),
                                       construct_drs_path(self.datafile))
예제 #28
0
 def test_out_name(self):
     expected = 't/HighResMIP/MOHC/t/t/r1i1p1/Amon/var/g2/v87654321'
     self.assertEqual(construct_drs_path(self.data_file2), expected)
예제 #29
0
 def test_success(self):
     expected = 't/HighResMIP/MOHC/t/t/r1i1p1/Amon/var1/gn/v12345678'
     self.assertEqual(construct_drs_path(self.data_file1), expected)
예제 #30
0
def main(args):
    """
    Main entry point
    """
    dreqs1 = DataRequest.objects.filter(
        climate_model__short_name='MPI-ESM1-2-XR',
        experiment__short_name='highresSST-present',
        variable_request__cmor_name__in=['hus7h', 'ta7h', 'ua7h']
    )

    dreqs2 = DataRequest.objects.filter(
        climate_model__short_name__in=['MPI-ESM1-2-HR', 'MPI-ESM1-2-XR'],
        experiment__short_name='highresSST-present',
        variable_request__table_name='Amon',
        variable_request__cmor_name='tas'
    )

    dreqs = dreqs1 | dreqs2

    logger.debug(f'Found {dreqs.count()} data requests')

    for dreq in dreqs:
        logger.debug(f'Processing {dreq}')
        old_directories = []
        for df in dreq.datafile_set.order_by('name'):
            if not df.online:
                logger.error(f'Not online {df.name}')
                continue
            if df.version == NEW_VERSION:
                logger.warning(f'Already at {NEW_VERSION} {df.name}')
                continue
            # save the sym link directory before we make any changes
            old_sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                            construct_drs_path(df))
            # now get back to updating the version
            df.version = NEW_VERSION
            gws = get_gws(df.directory)
            new_dir = os.path.join(gws, construct_drs_path(df))
            old_directory = df.directory
            if not os.path.exists(new_dir):
                os.mkdir(new_dir)
            os.rename(os.path.join(df.directory, df.name),
                      os.path.join(new_dir, df.name))
            df.directory = new_dir
            df.save()
            if old_directory not in old_directories:
                old_directories.append(old_directory)
            # Update any sym links too
            sym_link_path = os.path.join(old_sym_link_dir, df.name)
            if os.path.lexists(sym_link_path):
                if os.path.islink(sym_link_path):
                    os.remove(sym_link_path)
                    if old_sym_link_dir not in old_directories:
                        old_directories.append(old_sym_link_dir)
            sym_link_dir = os.path.join(BASE_OUTPUT_DIR,
                                        construct_drs_path(df))
            if not os.path.exists(sym_link_dir):
                os.makedirs(sym_link_dir)
            sym_link_path = os.path.join(sym_link_dir, df.name)
            os.symlink(os.path.join(df.directory, df.name), sym_link_path)

        for directory in old_directories:
            if not os.listdir(directory):
                delete_drs_dir(directory)
            else:
                logger.error(f'Not empty {directory}')