def main(args): """ Main entry point """ logger.debug('Starting delete_request.py for retrieval {}'.format( args.retrieval_id)) deletion_retrieval = match_one(RetrievalRequest, id=args.retrieval_id) if not deletion_retrieval: logger.error('Unable to find retrieval id {}'.format( args.retrieval_id)) sys.exit(1) if deletion_retrieval.date_deleted: logger.error('Retrieval {} was already deleted, at {}.'.format( deletion_retrieval.id, deletion_retrieval.date_deleted.strftime('%Y-%m-%d %H:%M'))) sys.exit(1) if not deletion_retrieval.data_finished: logger.error('Retrieval {} is not marked as finished.'.format( deletion_retrieval.id)) sys.exit(1) problems_encountered = False directories_found = [] base_output_dir = Settings.get_solo().base_output_dir # loop through all of the data requests in this retrieval for data_req in deletion_retrieval.data_request.all(): online_req_files = data_req.datafile_set.filter( online=True, directory__isnull=False) files_to_delete = date_filter_files(online_req_files, deletion_retrieval.start_year, deletion_retrieval.end_year) if files_to_delete is None: continue if not args.force: # find any other retrieval requests that still need this data other_retrievals = RetrievalRequest.objects.filter( data_request=data_req, data_finished=False) # loop through the retrieval requests that still need this data # request for ret_req in other_retrievals: ret_online_files = data_req.datafile_set.filter( online=True, directory__isnull=False) ret_filtered_files = date_filter_files(ret_online_files, ret_req.start_year, ret_req.end_year) if ret_filtered_files is None: continue # remove from the list of files to delete the ones that we have # just found are still needed files_to_delete = files_to_delete.difference( ret_filtered_files) # list the parts of the data request that are still required logger.debug("{} {} to {} won't be deleted".format( data_req, ret_req.start_year, ret_req.end_year)) # don't (try to) delete anything that's in the CEDA archive files_to_delete.exclude(directory__startswith=CEDA_ARCHIVE) # do the deleting if args.dryrun: logger.debug('{} {} files can be deleted.'.format( data_req, files_to_delete.distinct().count())) else: logger.debug('{} {} files will be deleted.'.format( data_req, files_to_delete.distinct().count())) for data_file in files_to_delete: old_file_dir = data_file.directory try: os.remove(os.path.join(data_file.directory, data_file.name)) except OSError as exc: logger.error(str(exc)) problems_encountered = True else: if data_file.directory not in directories_found: directories_found.append(data_file.directory) data_file.online = False data_file.directory = None data_file.save() # if a symbolic link exists from the base output directory # then delete this too if not old_file_dir.startswith(base_output_dir): sym_link_dir = os.path.join(base_output_dir, construct_drs_path(data_file)) sym_link = os.path.join(sym_link_dir, data_file.name) if not os.path.islink(sym_link): logger.error( "Expected {} to be a link but it isn't. " "Leaving this file in place.".format(sym_link)) problems_encountered = True else: try: os.remove(sym_link) except OSError as exc: logger.error(str(exc)) problems_encountered = True else: if sym_link_dir not in directories_found: directories_found.append(sym_link_dir) if not args.dryrun: # delete any empty directories for directory in directories_found: if not os.listdir(directory): delete_drs_dir(directory) # set date_deleted in the db if not problems_encountered: deletion_retrieval.date_deleted = timezone.now() deletion_retrieval.save() else: logger.error( 'Errors were encountered and so retrieval {} has not ' 'been marked as deleted. All possible files have been ' 'deleted.'.format(args.retrieval_id)) logger.debug('Completed delete_request.py for retrieval {}'.format( args.retrieval_id))
def test_subset_entirely(self): data_files = models.DataFile.objects.filter(online=False) self.assertEqual(['test4', 'test8'], _assertable(date_filter_files(data_files, 1952, 1992)))
def test_subset_spans(self): data_files = models.DataFile.objects.filter(online=False) self.assertEqual(['test4', 'test8'], _assertable(date_filter_files(data_files, 1975, 1985)))
def test_both_span(self): data_files = models.DataFile.objects.all() self.assertEqual(['test1', 'test2', 'test4', 'test8'], _assertable(date_filter_files(data_files, 1955, 1987)))
def test_start_spans(self): data_files = models.DataFile.objects.all() self.assertEqual(['test2', 'test4', 'test8'], _assertable(date_filter_files(data_files, 1965, 1995)))
def test_end_spans(self): data_files = models.DataFile.objects.all() self.assertEqual(['test1', 'test2', 'test4'], _assertable(date_filter_files(data_files, 1949, 1975)))
def test_entirely_contains(self): data_files = models.DataFile.objects.all() self.assertEqual(['test1', 'test2'], _assertable(date_filter_files(data_files, 1949, 1961)))
def main(args): """ Main entry point """ logger.debug('Starting retrieve_request.py for retrieval {}'. format(args.retrieval_id)) # check retrieval retrieval = match_one(RetrievalRequest, id=args.retrieval_id) if not retrieval: logger.error('Unable to find retrieval id {}'.format( args.retrieval_id)) sys.exit(1) if retrieval.date_complete: logger.error('Retrieval {} was already completed, at {}.'. format(retrieval.id, retrieval.date_complete.strftime('%Y-%m-%d %H:%M'))) sys.exit(1) tapes = {} for data_req in retrieval.data_request.all(): all_files = data_req.datafile_set.filter(online=False) filtered_files = date_filter_files(all_files, retrieval.start_year, retrieval.end_year) if filtered_files is None: continue tape_urls = list(set([qs['tape_url'] for qs in filtered_files.values('tape_url')])) tape_urls.sort() for tape_url in tape_urls: url_files = filtered_files.filter(tape_url=tape_url) if tape_url in tapes: tapes[tape_url] = list(chain(tapes[tape_url], url_files)) else: tapes[tape_url] = list(url_files) # lets get parallel to speed things up parallel_get_urls(tapes, args) # get a fresh DB connection after exiting from parallel operation django.db.connections.close_all() # check that all files were restored failed_files = False for data_req in retrieval.data_request.all(): all_files = data_req.datafile_set.filter(online=False) missing_files = date_filter_files(all_files, retrieval.start_year, retrieval.end_year) if missing_files is None: continue if missing_files.count() != 0: failed_files = True if failed_files: _email_admin_failure(retrieval) logger.error('Failed retrieve_request.py for retrieval {}'. format(args.retrieval_id)) else: # set date_complete in the db retrieval.date_complete = timezone.now() retrieval.save() # send an email to advise the user that their data's been restored _email_user_success(retrieval) logger.debug('Completed retrieve_request.py for retrieval {}'. format(args.retrieval_id))