def add_files(self, only_netcdf=False): """ Identify any files in the incoming directory and its subdirectories. Obtain all relevant metadata from each of the files found. There is also the option to ignore any files without a .nc suffix. :raises ValueError: if no files were found. """ if only_netcdf: found_files = list_files(self.incoming_directory) else: found_files = list_files(self.incoming_directory, '') if not found_files: msg = ('No netCDF files found in directory or its subdirectories ' '{}'.format(self.incoming_directory)) raise ValueError(msg) logger.debug('{} files identified'.format(len(found_files))) for found_file in found_files: obs_file = ObsFile(os.path.basename(found_file), os.path.dirname(found_file)) obs_file.add_metadata() self.obs_files.append(obs_file) logger.debug('{} files added'.format(len(self.obs_files)))
def test_list_files_ignore_symlinks(self): actual_tree_list = list_files(self.temp_dir, ignore_symlinks=True) expected_files = ['file1.nc', 'dir1/file3.nc'] expected_tree_list = [ self.temp_dir.joinpath(ef).as_posix() for ef in expected_files ] actual_tree_list.sort() expected_tree_list.sort() self.assertEqual(actual_tree_list, expected_tree_list)
def test_list_files_default_suffix(self): actual_tree_list = list_files(self.temp_dir) expected_files = ['file1.nc', 'dir1/file3.nc', 'dir1/file4.nc'] expected_tree_list = [ self.temp_dir.joinpath(ef).as_posix() for ef in expected_files ] actual_tree_list.sort() expected_tree_list.sort() self.assertEqual(actual_tree_list, expected_tree_list)
def main(args): """ Main entry point """ new_files = list_files(NEW_SUBMISSION) logger.debug(f'{len(new_files)} files found in the submission') dfs = DataFile.objects.filter(name__in=map(os.path.basename, new_files)) logger.debug(f'{dfs.count()} files found in the DMT') delete_files(dfs, '/gws/nopw/j04/primavera5/stream1') replace_files(dfs)
def main(args): """ Main entry point """ bad_files = list_files(BAD_DIR) logger.debug(f'{len(bad_files)} files found') for bf in bad_files: df = DataFile.objects.get(name=os.path.basename(bf)) new_dir = os.path.join(BASE_OUTPUT_DIR, construct_drs_path(df)) new_path = os.path.join(new_dir, df.name) if not os.path.exists(new_dir): os.makedirs(new_dir) if os.path.exists(new_path): if os.path.islink(new_path): os.remove(new_path) else: logger.error(f'{new_path} is not a link') continue os.rename(bf, new_path) df.directory = new_dir df.save()
def are_files_chowned(submission): """ Check whether all of the files in the submission's directory are now owned by the admin user (they will be owned by the submitting user until the cron job has chowned them). If there are no files in the directory then return false. :param pdata_app.models.DataSubmission submission: :returns: True if all files in the submission's directory are owned by the admin user. """ file_names = list_files(submission.directory) if not file_names: return False else: for file_name in file_names: uid = os.stat(file_name)[stat.ST_UID] user_name = pwd.getpwuid(uid)[0] if user_name != ADMIN_USER: return False return True
def main(args): """ Main entry point """ if args.processes == 1 and not iris.__version__.startswith('1.'): # if not multiprocessing then limit the number of Dask threads # this can't seem to be limited when using multiprocessing dask.config.set(pool=ThreadPool(2)) submission_dir = os.path.normpath(args.directory) logger.debug('Submission directory: %s', submission_dir) logger.debug('Project: %s', args.mip_era) logger.debug('Processes requested: %s', args.processes) try: if args.input: validated_metadata = read_json_file(args.input) data_sub = _get_submission_object(submission_dir) files_online = False else: files_online = True data_files = list_files(submission_dir) logger.debug('%s files identified', len(data_files)) if not args.validate_only and not args.output: data_sub = _get_submission_object(submission_dir) if data_sub.status != 'ARRIVED': msg = "The submission's status is not ARRIVED." logger.error(msg) raise SubmissionError(msg) try: if not args.no_prepare: run_prepare(data_files, args.processes) validated_metadata = list( identify_and_validate(data_files, args.mip_era, args.processes, args.file_format)) except SubmissionError: if not args.validate_only and not args.output: send_admin_rejection_email(data_sub) raise logger.debug('%s files validated successfully', len(validated_metadata)) if args.validate_only: logger.debug('Data submission not run (-v option specified)') logger.debug('Processing complete') sys.exit(0) if not args.relaxed and len(validated_metadata) != len(data_files): # if not args.output: # rejected_dir = move_rejected_files(submission_dir) # set_status_rejected(data_sub, rejected_dir) # send_user_rejection_email(data_sub) msg = ('Not all files passed validation. Please fix these ' 'errors and then re-run this script.') logger.error(msg) raise SubmissionError(msg) if args.tape_base_url: add_tape_url(validated_metadata, args.tape_base_url, submission_dir) if args.output: write_json_file(validated_metadata, args.output) else: update_database_submission(validated_metadata, data_sub, files_online, args.version_string) logger.debug( '%s files submitted successfully', match_one(DataSubmission, incoming_directory=submission_dir). get_data_files().count()) except SubmissionError: sys.exit(1) logger.debug('Processing complete')