def test_callable_args_and_kwargs(self): job = self.JobClassFactory( callable='scheduler.tests.test_args_kwargs') date = timezone.now() JobArgFactory(arg_type='str_val', str_val='one', content_object=job) JobKwargFactory(key='key1', arg_type='int_val', int_val=2, content_object=job) JobKwargFactory(key='key2', arg_type='datetime_val', datetime_val=date, content_object=job) JobKwargFactory(key='key3', arg_type='bool_val', bool_val=False, content_object=job) job.save() scheduler = django_rq.get_scheduler(job.queue) entry = next(i for i in scheduler.get_jobs() if i.id == job.job_id) self.assertEqual( entry.perform(), "test_args_kwargs('one', key1=2, key2={}, key3=False)".format( repr(date)))
def test_save_and_schedule(self): job = self.JobClassFactory() job.id = 1 job.save() is_scheduled = job.is_scheduled() self.assertIsNotNone(job.job_id) self.assertTrue(is_scheduled)
def test_delete_and_unschedule(self): job = self.JobClassFactory() job.save() is_scheduled = job.is_scheduled() self.assertIsNotNone(job.job_id) self.assertTrue(is_scheduled) scheduler = job.scheduler() job.delete() is_scheduled = job.job_id in scheduler self.assertFalse(is_scheduled)
def test_delete_and_unschedule(self): job_id = 1 job = self.JobClassFactory() job.id = job_id job.save() is_scheduled = job.is_scheduled() self.assertIsNotNone(job.job_id) self.assertTrue(is_scheduled) scheduler = job.scheduler() job.delete() is_scheduled = job_id in scheduler self.assertFalse(is_scheduled)
def test_repeat_none_disable_then_enable(self): base_time = timezone.now() job = self.JobClassFactory(scheduled_time=base_time + timedelta(minutes=2), repeat=None) self.assertEqual(job.repeat, None) self.assertEqual(job.enabled, True) self.assertEqual(job.scheduled_time, base_time + timedelta(minutes=2)) self.assertEqual(job.is_scheduled(), True) job.enabled = False job.scheduled_time = base_time - timedelta(minutes=2) job.save() self.assertEqual(job.repeat, None) self.assertEqual(job.enabled, False) self.assertEqual(job.scheduled_time, base_time - timedelta(minutes=2)) self.assertEqual(job.is_scheduled(), False) job.enabled = True job.save() self.assertEqual(job.repeat, None) self.assertEqual(job.enabled, True) self.assertEqual(job.scheduled_time, base_time - timedelta(minutes=2)) self.assertEqual(job.is_scheduled(), False) job.scheduled_time = base_time + timedelta(minutes=2) job.save() self.assertEqual(job.repeat, None) self.assertEqual(job.enabled, True) self.assertEqual(job.scheduled_time, base_time + timedelta(minutes=2)) self.assertEqual(job.is_scheduled(), True)
def test_save_disabled(self): job = self.JobClassFactory() job.save() job.enabled = False job.save() self.assertIsNone(job.job_id)
def test_save_enabled(self): job = self.JobClassFactory() job.save() self.assertIsNotNone(job.job_id)
def upload_for_ht(job, count=1): """ Task to upload files to Box in the backgroud. """ logger = logging.getLogger(__name__) kdip_dir = settings.KDIP_DIR for kdip in models.KDip.objects.filter(job__id=job.id).exclude(status='uploaded').exclude(status='upload_fail'): # Only create a PID if it doesn't already have one if job.upload_attempts == 0: if not kdip.pid: try: pidman_client = DjangoPidmanRestClient() pidman_domain = settings.PIDMAN_DOMAIN pidman_policy = settings.PIDMAN_POLICY ark = pidman_client.create_ark(domain='{}'.format(pidman_domain), target_uri='http://myuri.org', policy='{}'.format(pidman_policy), name='{}'.format(kdip.kdip_id)) noid = parse_ark(ark)['noid'] kdip.pid = noid kdip.save() logger.info("Ark {} was created for {}".format(ark, kdip.kdip_id)) except Exception as e: trace = traceback.format_exc() logger.error("Failed creating an ARK for %s: %s" % (kdip.kdip_id, e)) reason = "Box uplaod failed while making an ARK line 161 " + ' ' + trace print 'ERROR: {}'.format(reason) kdip_fail(job, kdip, reason) else: logger.info("{} already has pid {}".format(kdip.kdip_id, kdip.pid)) if not os.path.exists(kdip.process_dir): os.makedirs(kdip.process_dir) # Gather everything and write the file's checksum to a file via the # `checksum` method. The copy the file to the temp directory. # HT does not want sub directories in the package. tiffs = glob.glob('{}/{}/TIFF/*.tif'.format(kdip.path, kdip.kdip_id)) for tiff in tiffs: checksumfile(tiff, kdip.process_dir) shutil.copy(tiff, kdip.process_dir) altos = glob.glob('{}/{}/ALTO/*.xml'.format(kdip.path, kdip.kdip_id)) for alto in altos: checksumfile(alto, kdip.process_dir) shutil.copy(alto, kdip.process_dir) if 'alto' in alto: filename = alto.split('/') page, crap, ext = filename[-1].split('.') shutil.move(alto, '{}/{}.{}'.format(kdip.process_dir, page, ext)) ocrs = glob.glob('{}/{}/OCR/*.txt'.format(kdip.path, kdip.kdip_id)) for ocr in ocrs: checksumfile(ocr, kdip.process_dir) shutil.copy(ocr, kdip.process_dir) checksumfile(kdip.meta_yml, kdip.process_dir) checksumfile(kdip.marc_xml, kdip.process_dir) checksumfile(kdip.mets_xml, kdip.process_dir) shutil.copy(kdip.meta_yml, kdip.process_dir) shutil.copy(kdip.marc_xml, kdip.process_dir) shutil.copy(kdip.mets_xml, kdip.process_dir) # After copying all the files to the tmp directory. We verify that # the checksum matches the one we made before the move. This is done # using the `verify()` method. with open('{}/checksum.md5'.format(kdip.process_dir)) as f: content = f.readlines() for line in content: parts = line.split() verify = checksumverify(parts[0], kdip.process_dir, parts[1]) if verify is not True: logger.error('Checksum check failes for %s.' % kdip.process_dir) # Make the zip files zipf = zipfile.ZipFile('{}.zip'.format(kdip.process_dir), 'w', zipfile.ZIP_DEFLATED, allowZip64=True) os.chdir(kdip.process_dir) zipdir('.', zipf) zipf.close() # Delete the process directory to save space # but we keep the zip file shutil.rmtree(kdip.process_dir) attempts = 0 while attempts < 5: try: # Don't upload if no pid upload_file(job, kdip) if kdip.pid else kdip_fail(job, kdip, '{} has no pid.'.format(kdip.kdip_id)) break except ConnectionError: trace = traceback.format_exc() attempts += 1 sleep(5) reason = 'Connection Error, failed to upload {}.'.format(kdip.kdip_id) print 'ERROR: {}'.format(reason) kdip.status = 'retry' kdip.save() kdip_fail(job, kdip, reason) if attempts == 5 else logger.error( '{} failed to upload on attempt {} : '.format(kdip.kdip_id, attempts, trace)) except SysCallError: trace = traceback.format_exc() attempts = 5 reason = "SSL Error while uploading {}: {}".format(kdip.kdip_id, trace) logger.error(reason) kdip_fail(job, kdip, reason) except TypeError: trace = traceback.format_exc() attempts = 5 reason = "TypeError in upload package for {}: {}".format(kdip.kdip_id, trace) logger.error(reason) kdip_fail(job, kdip, reason) except MemoryError: trace = traceback.format_exc() attempts = 5 reason = "MemoryError for " + kdip.kdip_id logger.error(reason) kdip_fail(job, kdip, reason) except Exception as e: trace = traceback.format_exc() attempts = 5 reason = "Unexpected error for {}: {}, {}".format(kdip.kdip_id, str(e), trace) logger.error(reason) kdip_fail(job, kdip, reason) # Check to see if all the KDips uploaded. job.upload_attempts = job.upload_attempts + 1 statuses = job.kdip_set.values_list('status', flat=True) if ('retry' in statuses) and (job.upload_attempts < 5): # job.upload_attempts = job.upload_attempts + 1 return upload_for_ht(job, count - 1) elif ('upload_fail' in statuses) and (job.upload_attempts == 5): job.status = 'failed' job.save() elif job.upload_attempts == 5: job.status = 'being processed' job.save() recipients = settings.HATHITRUST_CONTACTS + settings.EMORY_MANAGERS kdip_list = '\n'.join(job.kdip_set.filter( status='uploaded').values_list('kdip_id', flat=True)) logger.info(kdip_list) send_to = settings.HATHITRUST_CONTACTS + settings.EMORY_MANAGERS send_from = settings.EMORY_CONTACT send_mail('New Volumes from Emory have been uploaded', 'The following volumes have been uploaded and are ready:\n\n{}'.format(kdip_list), send_from, send_to, fail_silently=False) else: return upload_for_ht(job, count - 1)
def post(self, request, *args, **kwargs): response = super(JobPublishView, self).post(request, *args, **kwargs) job = Job.objects.get(**kwargs) job.published_at = date.today() job.save() return response