예제 #1
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting delete_request.py for retrieval {}'.format(
        args.retrieval_id))

    deletion_retrieval = match_one(RetrievalRequest, id=args.retrieval_id)
    if not deletion_retrieval:
        logger.error('Unable to find retrieval id {}'.format(
            args.retrieval_id))
        sys.exit(1)

    if deletion_retrieval.date_deleted:
        logger.error('Retrieval {} was already deleted, at {}.'.format(
            deletion_retrieval.id,
            deletion_retrieval.date_deleted.strftime('%Y-%m-%d %H:%M')))
        sys.exit(1)

    if not deletion_retrieval.data_finished:
        logger.error('Retrieval {} is not marked as finished.'.format(
            deletion_retrieval.id))
        sys.exit(1)

    problems_encountered = False
    directories_found = []
    base_output_dir = Settings.get_solo().base_output_dir

    # loop through all of the data requests in this retrieval
    for data_req in deletion_retrieval.data_request.all():
        online_req_files = data_req.datafile_set.filter(
            online=True, directory__isnull=False)
        files_to_delete = date_filter_files(online_req_files,
                                            deletion_retrieval.start_year,
                                            deletion_retrieval.end_year)

        if files_to_delete is None:
            continue

        if not args.force:
            # find any other retrieval requests that still need this data
            other_retrievals = RetrievalRequest.objects.filter(
                data_request=data_req, data_finished=False)
            # loop through the retrieval requests that still need this data
            # request
            for ret_req in other_retrievals:
                ret_online_files = data_req.datafile_set.filter(
                    online=True, directory__isnull=False)
                ret_filtered_files = date_filter_files(ret_online_files,
                                                       ret_req.start_year,
                                                       ret_req.end_year)
                if ret_filtered_files is None:
                    continue
                # remove from the list of files to delete the ones that we have
                # just found are still needed
                files_to_delete = files_to_delete.difference(
                    ret_filtered_files)
                # list the parts of the data request that are still required
                logger.debug("{} {} to {} won't be deleted".format(
                    data_req, ret_req.start_year, ret_req.end_year))

        # don't (try to) delete anything that's in the CEDA archive
        files_to_delete.exclude(directory__startswith=CEDA_ARCHIVE)

        # do the deleting
        if args.dryrun:
            logger.debug('{} {} files can be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
        else:
            logger.debug('{} {} files will be deleted.'.format(
                data_req,
                files_to_delete.distinct().count()))
            for data_file in files_to_delete:
                old_file_dir = data_file.directory
                try:
                    os.remove(os.path.join(data_file.directory,
                                           data_file.name))
                except OSError as exc:
                    logger.error(str(exc))
                    problems_encountered = True
                else:
                    if data_file.directory not in directories_found:
                        directories_found.append(data_file.directory)
                    data_file.online = False
                    data_file.directory = None
                    data_file.save()

                # if a symbolic link exists from the base output directory
                # then delete this too
                if not old_file_dir.startswith(base_output_dir):
                    sym_link_dir = os.path.join(base_output_dir,
                                                construct_drs_path(data_file))
                    sym_link = os.path.join(sym_link_dir, data_file.name)
                    if not os.path.islink(sym_link):
                        logger.error(
                            "Expected {} to be a link but it isn't. "
                            "Leaving this file in place.".format(sym_link))
                        problems_encountered = True
                    else:
                        try:
                            os.remove(sym_link)
                        except OSError as exc:
                            logger.error(str(exc))
                            problems_encountered = True
                        else:
                            if sym_link_dir not in directories_found:
                                directories_found.append(sym_link_dir)

    if not args.dryrun:
        # delete any empty directories
        for directory in directories_found:
            if not os.listdir(directory):
                delete_drs_dir(directory)

        # set date_deleted in the db
        if not problems_encountered:
            deletion_retrieval.date_deleted = timezone.now()
            deletion_retrieval.save()
        else:
            logger.error(
                'Errors were encountered and so retrieval {} has not '
                'been marked as deleted. All possible files have been '
                'deleted.'.format(args.retrieval_id))

    logger.debug('Completed delete_request.py for retrieval {}'.format(
        args.retrieval_id))
예제 #2
0
 def test_subset_entirely(self):
     data_files = models.DataFile.objects.filter(online=False)
     self.assertEqual(['test4', 'test8'],
                      _assertable(date_filter_files(data_files, 1952,
                                                    1992)))
예제 #3
0
 def test_subset_spans(self):
     data_files = models.DataFile.objects.filter(online=False)
     self.assertEqual(['test4', 'test8'],
                      _assertable(date_filter_files(data_files, 1975,
                                                    1985)))
예제 #4
0
 def test_both_span(self):
     data_files = models.DataFile.objects.all()
     self.assertEqual(['test1', 'test2', 'test4', 'test8'],
                      _assertable(date_filter_files(data_files, 1955,
                                                    1987)))
예제 #5
0
 def test_start_spans(self):
     data_files = models.DataFile.objects.all()
     self.assertEqual(['test2', 'test4', 'test8'],
                      _assertable(date_filter_files(data_files, 1965,
                                                    1995)))
예제 #6
0
 def test_end_spans(self):
     data_files = models.DataFile.objects.all()
     self.assertEqual(['test1', 'test2', 'test4'],
                      _assertable(date_filter_files(data_files, 1949,
                                                    1975)))
예제 #7
0
 def test_entirely_contains(self):
     data_files = models.DataFile.objects.all()
     self.assertEqual(['test1', 'test2'],
                      _assertable(date_filter_files(data_files, 1949,
                                                    1961)))
예제 #8
0
def main(args):
    """
    Main entry point
    """
    logger.debug('Starting retrieve_request.py for retrieval {}'.
                 format(args.retrieval_id))

    # check retrieval
    retrieval = match_one(RetrievalRequest, id=args.retrieval_id)
    if not retrieval:
        logger.error('Unable to find retrieval id {}'.format(
            args.retrieval_id))
        sys.exit(1)

    if retrieval.date_complete:
        logger.error('Retrieval {} was already completed, at {}.'.
                     format(retrieval.id,
                            retrieval.date_complete.strftime('%Y-%m-%d %H:%M')))
        sys.exit(1)

    tapes = {}
    for data_req in retrieval.data_request.all():
        all_files = data_req.datafile_set.filter(online=False)
        filtered_files = date_filter_files(all_files, retrieval.start_year,
                                           retrieval.end_year)
        if filtered_files is None:
            continue

        tape_urls = list(set([qs['tape_url']
                              for qs in filtered_files.values('tape_url')]))
        tape_urls.sort()

        for tape_url in tape_urls:
            url_files = filtered_files.filter(tape_url=tape_url)
            if tape_url in tapes:
                tapes[tape_url] = list(chain(tapes[tape_url], url_files))
            else:
                tapes[tape_url] = list(url_files)

    # lets get parallel to speed things up
    parallel_get_urls(tapes, args)
    # get a fresh DB connection after exiting from parallel operation
    django.db.connections.close_all()

    # check that all files were restored
    failed_files = False
    for data_req in retrieval.data_request.all():
        all_files = data_req.datafile_set.filter(online=False)
        missing_files = date_filter_files(all_files, retrieval.start_year,
                                          retrieval.end_year)
        if missing_files is None:
            continue
        if missing_files.count() != 0:
            failed_files = True

    if failed_files:
        _email_admin_failure(retrieval)
        logger.error('Failed retrieve_request.py for retrieval {}'.
                     format(args.retrieval_id))
    else:
        # set date_complete in the db
        retrieval.date_complete = timezone.now()
        retrieval.save()

        # send an email to advise the user that their data's been restored
        _email_user_success(retrieval)

        logger.debug('Completed retrieve_request.py for retrieval {}'.
                     format(args.retrieval_id))