Ejemplo n.º 1
0
    def add_files(self, only_netcdf=False):
        """
        Identify any files in the incoming directory and its subdirectories.
        Obtain all relevant metadata from each of the files found. There
        is also the option to ignore any files without a .nc suffix.

        :raises ValueError: if no files were found.
        """
        if only_netcdf:
            found_files = list_files(self.incoming_directory)
        else:
            found_files = list_files(self.incoming_directory, '')

        if not found_files:
            msg = ('No netCDF files found in directory or its subdirectories '
                   '{}'.format(self.incoming_directory))
            raise ValueError(msg)

        logger.debug('{} files identified'.format(len(found_files)))

        for found_file in found_files:
            obs_file = ObsFile(os.path.basename(found_file),
                               os.path.dirname(found_file))
            obs_file.add_metadata()
            self.obs_files.append(obs_file)

        logger.debug('{} files added'.format(len(self.obs_files)))
Ejemplo n.º 2
0
 def test_list_files_ignore_symlinks(self):
     actual_tree_list = list_files(self.temp_dir, ignore_symlinks=True)
     expected_files = ['file1.nc', 'dir1/file3.nc']
     expected_tree_list = [
         self.temp_dir.joinpath(ef).as_posix() for ef in expected_files
     ]
     actual_tree_list.sort()
     expected_tree_list.sort()
     self.assertEqual(actual_tree_list, expected_tree_list)
Ejemplo n.º 3
0
 def test_list_files_default_suffix(self):
     actual_tree_list = list_files(self.temp_dir)
     expected_files = ['file1.nc', 'dir1/file3.nc', 'dir1/file4.nc']
     expected_tree_list = [
         self.temp_dir.joinpath(ef).as_posix() for ef in expected_files
     ]
     actual_tree_list.sort()
     expected_tree_list.sort()
     self.assertEqual(actual_tree_list, expected_tree_list)
Ejemplo n.º 4
0
def main(args):
    """
    Main entry point
    """
    new_files = list_files(NEW_SUBMISSION)

    logger.debug(f'{len(new_files)} files found in the submission')

    dfs = DataFile.objects.filter(name__in=map(os.path.basename, new_files))

    logger.debug(f'{dfs.count()} files found in the DMT')

    delete_files(dfs, '/gws/nopw/j04/primavera5/stream1')
    replace_files(dfs)
Ejemplo n.º 5
0
def main(args):
    """
    Main entry point
    """
    bad_files = list_files(BAD_DIR)

    logger.debug(f'{len(bad_files)} files found')

    for bf in bad_files:
        df = DataFile.objects.get(name=os.path.basename(bf))
        new_dir = os.path.join(BASE_OUTPUT_DIR, construct_drs_path(df))
        new_path = os.path.join(new_dir, df.name)
        if not os.path.exists(new_dir):
            os.makedirs(new_dir)
        if os.path.exists(new_path):
            if os.path.islink(new_path):
                os.remove(new_path)
            else:
                logger.error(f'{new_path} is not a link')
                continue
        os.rename(bf, new_path)
        df.directory = new_dir
        df.save()
Ejemplo n.º 6
0
def are_files_chowned(submission):
    """
    Check whether all of the files in the submission's directory are now
    owned by the admin user (they will be owned by the submitting user until
    the cron job has chowned them). If there are no files in the directory then
    return false.

    :param pdata_app.models.DataSubmission submission:
    :returns: True if all files in the submission's directory are owned by the
        admin user.
    """
    file_names = list_files(submission.directory)

    if not file_names:
        return False
    else:
        for file_name in file_names:
            uid = os.stat(file_name)[stat.ST_UID]
            user_name = pwd.getpwuid(uid)[0]

            if user_name != ADMIN_USER:
                return False

    return True
Ejemplo n.º 7
0
def main(args):
    """
    Main entry point
    """
    if args.processes == 1 and not iris.__version__.startswith('1.'):
        # if not multiprocessing then limit the number of Dask threads
        # this can't seem to be limited when using multiprocessing
        dask.config.set(pool=ThreadPool(2))

    submission_dir = os.path.normpath(args.directory)
    logger.debug('Submission directory: %s', submission_dir)
    logger.debug('Project: %s', args.mip_era)
    logger.debug('Processes requested: %s', args.processes)

    try:
        if args.input:
            validated_metadata = read_json_file(args.input)
            data_sub = _get_submission_object(submission_dir)
            files_online = False
        else:
            files_online = True
            data_files = list_files(submission_dir)

            logger.debug('%s files identified', len(data_files))

            if not args.validate_only and not args.output:
                data_sub = _get_submission_object(submission_dir)

                if data_sub.status != 'ARRIVED':
                    msg = "The submission's status is not ARRIVED."
                    logger.error(msg)
                    raise SubmissionError(msg)

            try:
                if not args.no_prepare:
                    run_prepare(data_files, args.processes)
                validated_metadata = list(
                    identify_and_validate(data_files, args.mip_era,
                                          args.processes, args.file_format))
            except SubmissionError:
                if not args.validate_only and not args.output:
                    send_admin_rejection_email(data_sub)
                raise

            logger.debug('%s files validated successfully',
                         len(validated_metadata))

            if args.validate_only:
                logger.debug('Data submission not run (-v option specified)')
                logger.debug('Processing complete')
                sys.exit(0)

            if not args.relaxed and len(validated_metadata) != len(data_files):
                # if not args.output:
                #     rejected_dir = move_rejected_files(submission_dir)
                #     set_status_rejected(data_sub, rejected_dir)
                #     send_user_rejection_email(data_sub)
                msg = ('Not all files passed validation. Please fix these '
                       'errors and then re-run this script.')
                logger.error(msg)
                raise SubmissionError(msg)

        if args.tape_base_url:
            add_tape_url(validated_metadata, args.tape_base_url,
                         submission_dir)

        if args.output:
            write_json_file(validated_metadata, args.output)
        else:
            update_database_submission(validated_metadata, data_sub,
                                       files_online, args.version_string)
            logger.debug(
                '%s files submitted successfully',
                match_one(DataSubmission, incoming_directory=submission_dir).
                get_data_files().count())

    except SubmissionError:
        sys.exit(1)

    logger.debug('Processing complete')