def test_callable_args_and_kwargs(self):
     job = self.JobClassFactory(
         callable='scheduler.tests.test_args_kwargs')
     date = timezone.now()
     JobArgFactory(arg_type='str_val',
                   str_val='one',
                   content_object=job)
     JobKwargFactory(key='key1',
                     arg_type='int_val',
                     int_val=2,
                     content_object=job)
     JobKwargFactory(key='key2',
                     arg_type='datetime_val',
                     datetime_val=date,
                     content_object=job)
     JobKwargFactory(key='key3',
                     arg_type='bool_val',
                     bool_val=False,
                     content_object=job)
     job.save()
     scheduler = django_rq.get_scheduler(job.queue)
     entry = next(i for i in scheduler.get_jobs() if i.id == job.job_id)
     self.assertEqual(
         entry.perform(),
         "test_args_kwargs('one', key1=2, key2={}, key3=False)".format(
             repr(date)))
 def test_save_and_schedule(self):
     job = self.JobClassFactory()
     job.id = 1
     job.save()
     is_scheduled = job.is_scheduled()
     self.assertIsNotNone(job.job_id)
     self.assertTrue(is_scheduled)
 def test_save_and_schedule(self):
     job = self.JobClassFactory()
     job.id = 1
     job.save()
     is_scheduled = job.is_scheduled()
     self.assertIsNotNone(job.job_id)
     self.assertTrue(is_scheduled)
 def test_delete_and_unschedule(self):
     job = self.JobClassFactory()
     job.save()
     is_scheduled = job.is_scheduled()
     self.assertIsNotNone(job.job_id)
     self.assertTrue(is_scheduled)
     scheduler = job.scheduler()
     job.delete()
     is_scheduled = job.job_id in scheduler
     self.assertFalse(is_scheduled)
 def test_delete_and_unschedule(self):
     job_id = 1
     job = self.JobClassFactory()
     job.id = job_id
     job.save()
     is_scheduled = job.is_scheduled()
     self.assertIsNotNone(job.job_id)
     self.assertTrue(is_scheduled)
     scheduler = job.scheduler()
     job.delete()
     is_scheduled = job_id in scheduler
     self.assertFalse(is_scheduled)
 def test_repeat_none_disable_then_enable(self):
     base_time = timezone.now()
     job = self.JobClassFactory(scheduled_time=base_time +
                                timedelta(minutes=2),
                                repeat=None)
     self.assertEqual(job.repeat, None)
     self.assertEqual(job.enabled, True)
     self.assertEqual(job.scheduled_time, base_time + timedelta(minutes=2))
     self.assertEqual(job.is_scheduled(), True)
     job.enabled = False
     job.scheduled_time = base_time - timedelta(minutes=2)
     job.save()
     self.assertEqual(job.repeat, None)
     self.assertEqual(job.enabled, False)
     self.assertEqual(job.scheduled_time, base_time - timedelta(minutes=2))
     self.assertEqual(job.is_scheduled(), False)
     job.enabled = True
     job.save()
     self.assertEqual(job.repeat, None)
     self.assertEqual(job.enabled, True)
     self.assertEqual(job.scheduled_time, base_time - timedelta(minutes=2))
     self.assertEqual(job.is_scheduled(), False)
     job.scheduled_time = base_time + timedelta(minutes=2)
     job.save()
     self.assertEqual(job.repeat, None)
     self.assertEqual(job.enabled, True)
     self.assertEqual(job.scheduled_time, base_time + timedelta(minutes=2))
     self.assertEqual(job.is_scheduled(), True)
 def test_save_disabled(self):
     job = self.JobClassFactory()
     job.save()
     job.enabled = False
     job.save()
     self.assertIsNone(job.job_id)
 def test_save_enabled(self):
     job = self.JobClassFactory()
     job.save()
     self.assertIsNotNone(job.job_id)
 def test_save_disabled(self):
     job = self.JobClassFactory()
     job.save()
     job.enabled = False
     job.save()
     self.assertIsNone(job.job_id)
 def test_save_enabled(self):
     job = self.JobClassFactory()
     job.save()
     self.assertIsNotNone(job.job_id)
def upload_for_ht(job, count=1):
    """
    Task to upload files to Box in the backgroud.
    """
    logger = logging.getLogger(__name__)
    kdip_dir = settings.KDIP_DIR

    for kdip in models.KDip.objects.filter(job__id=job.id).exclude(status='uploaded').exclude(status='upload_fail'):
        # Only create a PID if it doesn't already have one
        if job.upload_attempts == 0:
            if not kdip.pid:
                try:
                    pidman_client = DjangoPidmanRestClient()
                    pidman_domain = settings.PIDMAN_DOMAIN
                    pidman_policy = settings.PIDMAN_POLICY

                    ark = pidman_client.create_ark(domain='{}'.format(pidman_domain),
                                                   target_uri='http://myuri.org',
                                                   policy='{}'.format(pidman_policy),
                                                   name='{}'.format(kdip.kdip_id))

                    noid = parse_ark(ark)['noid']

                    kdip.pid = noid
                    kdip.save()

                    logger.info("Ark {} was created for {}".format(ark, kdip.kdip_id))
                except Exception as e:
                    trace = traceback.format_exc()
                    logger.error("Failed creating an ARK for %s: %s" %
                                 (kdip.kdip_id, e))
                    reason = "Box uplaod failed while making an ARK line 161 " + ' ' + trace
                    print 'ERROR: {}'.format(reason)
                    kdip_fail(job, kdip, reason)

            else:
                logger.info("{} already has pid {}".format(kdip.kdip_id, kdip.pid))

            if not os.path.exists(kdip.process_dir):
                os.makedirs(kdip.process_dir)

            # Gather everything and write the file's checksum to a file via the
            # `checksum` method. The copy the file to the temp directory.
            # HT does not want sub directories in the package.
            tiffs = glob.glob('{}/{}/TIFF/*.tif'.format(kdip.path, kdip.kdip_id))
            for tiff in tiffs:
                checksumfile(tiff, kdip.process_dir)
                shutil.copy(tiff, kdip.process_dir)

            altos = glob.glob('{}/{}/ALTO/*.xml'.format(kdip.path, kdip.kdip_id))
            for alto in altos:
                checksumfile(alto, kdip.process_dir)
                shutil.copy(alto, kdip.process_dir)
                if 'alto' in alto:
                    filename = alto.split('/')
                    page, crap, ext = filename[-1].split('.')
                    shutil.move(alto, '{}/{}.{}'.format(kdip.process_dir, page, ext))

            ocrs = glob.glob('{}/{}/OCR/*.txt'.format(kdip.path, kdip.kdip_id))
            for ocr in ocrs:
                checksumfile(ocr, kdip.process_dir)
                shutil.copy(ocr, kdip.process_dir)

            checksumfile(kdip.meta_yml, kdip.process_dir)
            checksumfile(kdip.marc_xml, kdip.process_dir)
            checksumfile(kdip.mets_xml, kdip.process_dir)

            shutil.copy(kdip.meta_yml, kdip.process_dir)
            shutil.copy(kdip.marc_xml, kdip.process_dir)
            shutil.copy(kdip.mets_xml, kdip.process_dir)

            # After copying all the files to the tmp directory. We verify that
            # the checksum matches the one we made before the move. This is done
            # using the `verify()` method.
            with open('{}/checksum.md5'.format(kdip.process_dir)) as f:
                content = f.readlines()
                for line in content:
                    parts = line.split()
                    verify = checksumverify(parts[0], kdip.process_dir, parts[1])
                    if verify is not True:
                        logger.error('Checksum check failes for %s.' %
                                     kdip.process_dir)

            # Make the zip files
            zipf = zipfile.ZipFile('{}.zip'.format(kdip.process_dir), 'w', zipfile.ZIP_DEFLATED, allowZip64=True)
            os.chdir(kdip.process_dir)
            zipdir('.', zipf)
            zipf.close()

            # Delete the process directory to save space
            # but we keep the zip file
            shutil.rmtree(kdip.process_dir)

        attempts = 0

        while attempts < 5:

            try:
                # Don't upload if no pid
                upload_file(job, kdip) if kdip.pid else kdip_fail(job, kdip, '{} has no pid.'.format(kdip.kdip_id))
                break
            except ConnectionError:
                trace = traceback.format_exc()
                attempts += 1
                sleep(5)
                reason = 'Connection Error, failed to upload {}.'.format(kdip.kdip_id)
                print 'ERROR: {}'.format(reason)
                kdip.status = 'retry'
                kdip.save()
                kdip_fail(job, kdip, reason) if attempts == 5 else logger.error(
                    '{} failed to upload on attempt {} : '.format(kdip.kdip_id, attempts, trace))

            except SysCallError:
                trace = traceback.format_exc()
                attempts = 5
                reason = "SSL Error while uploading {}: {}".format(kdip.kdip_id, trace)
                logger.error(reason)
                kdip_fail(job, kdip, reason)

            except TypeError:
                trace = traceback.format_exc()
                attempts = 5
                reason = "TypeError in upload package for {}: {}".format(kdip.kdip_id, trace)
                logger.error(reason)
                kdip_fail(job, kdip, reason)

            except MemoryError:
                trace = traceback.format_exc()
                attempts = 5
                reason = "MemoryError for " + kdip.kdip_id
                logger.error(reason)
                kdip_fail(job, kdip, reason)

            except Exception as e:
                trace = traceback.format_exc()
                attempts = 5
                reason = "Unexpected error for {}: {}, {}".format(kdip.kdip_id, str(e), trace)
                logger.error(reason)
                kdip_fail(job, kdip, reason)

    # Check to see if all the KDips uploaded.
    job.upload_attempts = job.upload_attempts + 1
    statuses = job.kdip_set.values_list('status', flat=True)
    if ('retry' in statuses) and (job.upload_attempts < 5):
        # job.upload_attempts = job.upload_attempts + 1
        return upload_for_ht(job, count - 1)
    elif ('upload_fail' in statuses) and (job.upload_attempts == 5):
        job.status = 'failed'
        job.save()
    elif job.upload_attempts == 5:
        job.status = 'being processed'
        job.save()
        recipients = settings.HATHITRUST_CONTACTS + settings.EMORY_MANAGERS
        kdip_list = '\n'.join(job.kdip_set.filter(
            status='uploaded').values_list('kdip_id', flat=True))
        logger.info(kdip_list)
        send_to = settings.HATHITRUST_CONTACTS + settings.EMORY_MANAGERS
        send_from = settings.EMORY_CONTACT
        send_mail('New Volumes from Emory have been uploaded', 'The following volumes have been uploaded and are ready:\n\n{}'.format(kdip_list), send_from, send_to, fail_silently=False)
    else:
        return upload_for_ht(job, count - 1)
Exemple #12
0
 def post(self, request, *args, **kwargs):
     response = super(JobPublishView, self).post(request, *args, **kwargs)
     job = Job.objects.get(**kwargs)
     job.published_at = date.today()
     job.save()
     return response
Exemple #13
0
 def post(self, request, *args, **kwargs):
     response = super(JobPublishView, self).post(request, *args, **kwargs)
     job = Job.objects.get(**kwargs)
     job.published_at = date.today()
     job.save()
     return response