def store_in_cdn(self, filename, image, cdn_path, download): try: storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket storage_resource = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + filename # adds image to bucket image.seek(0) content_type, encoding = guess_type(filename) storage_context.set_resource_from_file(storage_resource, image, metadata={ 'Content-Type': content_type }) if download: dict_metadata = {'Content-Disposition': str("Content-Disposition: attachment; filename=" + filename + ";"), 'Content-Type': content_type} filename_no_extension, extension = filename.rsplit('.', 1) file_download = filename_no_extension + "-download." + extension storage_resource_dest_download_cdn = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + \ file_download # file is copied with additional metadata storage_context.copy_resource(storage_resource, storage_resource_dest_download_cdn, additional_dict_metadata=dict_metadata) finally: image.close()
def do_activity(self, data=None): try: if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) article_id = session.get_value(run, 'article_id') version = session.get_value(run, 'version') except Exception as e: self.logger.exception(str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE try: storage_context = StorageContext(self.settings) bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id)) files_in_bucket = storage_context.list_resources(images_resource) original_figures = article_structure.get_figures_for_iiif( files_in_bucket) iiif_path_for_article = self.settings.iiif_resolver.replace( '{article_id}', article_id) results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article) bad_images = list(filter(lambda x: x[0] == False, results)) if len(bad_images) > 0: # print endpoints that did not work self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "Some images are not available through the IIIF endpoint: " + str(bad_images)) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. All endpoints work. Article: " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception(str(e)) self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "An error occurred when checking IIIF endpoint. Article " + article_id + '; message: ' + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Starting submission convert images to jpg for article " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name storage_context = StorageContext(self.settings) files_in_bucket = storage_context.list_resources(orig_resource) figures = filter(article_structure.article_figure, files_in_bucket) # download is not a IIIF asset but is currently kept for compatibility # download may become obsolete in future formats = {"Original": { "sources": "tif", "format": "jpg", "download": "yes" }} for file_name in figures: figure_resource = orig_resource + "/" + file_name file_path = self.get_tmp_dir() + os.sep + file_name file_pointer = storage_context.get_resource_to_file_pointer(figure_resource, file_path) cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket cdn_resource_path = storage_provider + cdn_bucket_name + "/" + article_id + "/" publish_locations = [cdn_resource_path] image_conversion.generate_images(self.settings, formats, file_pointer, article_structure.ArticleInfo(file_name), publish_locations, self.logger) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished converting images for " + article_id + ": " + str(len(figures)) + " images processed ") return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception("An error occurred during " + self.pretty_name) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Error converting images to JPG for article" + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): """ Do the work """ if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) article_id = session.get_value(run, 'article_id') version = session.get_value(run, 'version') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Starting Glencoe video check for " + article_id) try: expanded_folder = session.get_value(run, 'expanded_folder') if expanded_folder is None: raise RuntimeError("No session value for expanded folder") expanded_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket self.logger.info("expanded_bucket: " + expanded_bucket) xml_filename = lax_provider.get_xml_file_name(self.settings, expanded_folder, expanded_bucket, version) if xml_filename is None: raise RuntimeError("No xml_filename found.") xml_origin = "".join((self.settings.storage_provider, "://", expanded_bucket, "/", expanded_folder + '/' + xml_filename)) storage_context = StorageContext(self.settings) xml_content = storage_context.get_resource_as_string(xml_origin) if glencoe_check.has_videos(xml_content): glencoe_check.validate_sources(glencoe_check.metadata(glencoe_check.check_msid(article_id), self.settings)) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. Glencoe is available. Article: " + article_id) return True self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. No Glencoe media tags found in xml. " "Article: " + article_id) return True except AssertionError as err: self.logger.info(err) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Glencoe video is not available for article " + article_id + '; message: ' + str(err)) time.sleep(60) return activity.activity.ACTIVITY_TEMPORARY_FAILURE except Exception as e: self.logger.exception(str(e)) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "An error occurred when checking for Glencoe video. Article " + article_id + '; message: ' + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def store_file(self, path, article_id): storage_context = StorageContext(self.settings) r = requests.get(path) if r.status_code == 200: resource = self.s3_resources(path, article_id) self.logger.info("S3 resource: " + resource) jpg_filename = os.path.split(resource)[-1] storage_context.set_resource_from_string(resource, r.content, content_type=r.headers['content-type']) return jpg_filename else: raise RuntimeError("Glencoe returned a %s status code for %s" % (r.status_code, path))
def store_file(self, path, article_id): storage_context = StorageContext(self.settings) r = requests.get(path) if r.status_code == 200: resource = self.s3_resources(path, article_id) self.logger.info("S3 resource: " + resource) jpg_filename = os.path.split(resource)[-1] storage_context.set_resource_from_string( resource, r.content, content_type=r.headers['content-type']) return jpg_filename else: raise RuntimeError("Glencoe returned a %s status code for %s" % (r.status_code, path))
def do_activity(self, data=None): run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Depositing Ingest assets for " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(orig_resource) pre_ingest_assets = article_structure.pre_ingest_assets( files_in_bucket) for file_name in pre_ingest_assets: orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name storage_context.copy_resource(orig_resource, dest_resource) if self.logger: self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name)) self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "end", "Deposited Ingest assets for article " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception("Exception when Depositing Ingest assets") self.emit_monitor_event( self.settings, article_id, version, run, self.pretty_name, "error", "Error depositing Ingest assets for article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): try: if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) run = data['run'] session = Session(self.settings) article_id = session.get_value(run, 'article_id') version = session.get_value(run, 'version') except Exception as e: self.logger.exception(str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE try: storage_context = StorageContext(self.settings) bucket = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket images_resource = "".join((self.settings.storage_provider, "://", bucket, "/", article_id)) files_in_bucket = storage_context.list_resources(images_resource) original_figures = article_structure.get_figures_for_iiif(files_in_bucket) iiif_path_for_article = self.settings.iiif_resolver.replace('{article_id}', article_id) results = self.retrieve_endpoints_check(original_figures, iiif_path_for_article) bad_images = list(filter(lambda x: x[0] == False, results)) if len(bad_images) > 0: # print endpoints that did not work self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Some images are not available through the IIIF endpoint: " + str(bad_images)) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Finished Verification. All endpoints work. Article: " + article_id) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception(str(e)) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "An error occurred when checking IIIF endpoint. Article " + article_id + '; message: ' + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def store_in_publish_locations(settings, filename, image, publish_locations, download): try: storage_context = StorageContext(settings) for resource in publish_locations: image.seek(0) content_type, encoding = guess_type(filename) storage_context.set_resource_from_file(resource + filename, image, metadata={'Content-Type': content_type}) if download: dict_metadata = {'Content-Disposition': str("Content-Disposition: attachment; filename=" + filename + ";"), 'Content-Type': content_type} filename_no_extension, extension = filename.rsplit('.', 1) file_download = filename_no_extension + "-download." + extension storage_context.copy_resource(resource + filename, resource + file_download, additional_dict_metadata=dict_metadata) finally: image.close()
def do_activity(self, data): session = Session(self.settings) eif_location = session.get_value(data['run'], 'eif_location') self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "start", "Starting to set EIF to publish") try: if not isinstance(eif_location, basestring): self.logger.error(self.pretty_name + " error. eif_location must be string") raise Exception("eif_location not available") storage_context = StorageContext(self.settings) eif_origin = "".join((self.settings.storage_provider, "://", self.settings.publishing_buckets_prefix + self.settings.eif_bucket, "/", eif_location)) except Exception as e: self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "error", e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE success, error = self.set_eif_to_publish(storage_context, eif_origin) if success: self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "end", "Finished to set EIF to publish") return activity.activity.ACTIVITY_SUCCESS self.logger.error(error) self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "error", error) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def store_in_cdn(self, filename, image, cdn_path, download): try: storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket storage_resource = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + filename # adds image to bucket image.seek(0) content_type, encoding = guess_type(filename) storage_context.set_resource_from_file( storage_resource, image, metadata={'Content-Type': content_type}) if download: dict_metadata = { 'Content-Disposition': str("Content-Disposition: attachment; filename=" + filename + ";"), 'Content-Type': content_type } filename_no_extension, extension = filename.rsplit('.', 1) file_download = filename_no_extension + "-download." + extension storage_resource_dest_download_cdn = storage_provider + cdn_bucket_name + "/" + cdn_path + "/" + \ file_download # file is copied with additional metadata storage_context.copy_resource( storage_resource, storage_resource_dest_download_cdn, additional_dict_metadata=dict_metadata) finally: image.close()
def do_activity(self, data=None): """ Do the work """ run = data['run'] session = Session(self.settings) version = session.get_value(run, 'version') article_id = session.get_value(run, 'article_id') self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "start", "Depositing assets for " + article_id) try: expanded_folder_name = session.get_value(run, 'expanded_folder') expanded_folder_bucket = (self.settings.publishing_buckets_prefix + self.settings.expanded_bucket) storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(orig_resource) # filter figures that have already been copied (see DepositIngestAssets activity) pre_ingest_assets = article_structure.pre_ingest_assets(files_in_bucket) other_assets = filter(lambda asset: asset not in pre_ingest_assets, files_in_bucket) # assets bucket cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions) for file_name in other_assets: orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" storage_context.copy_resource(orig_resource + file_name, dest_resource + file_name) if self.logger: self.logger.info("Uploaded file %s to %s" % (file_name, cdn_bucket_name)) file_name_no_extension, extension = file_name.rsplit('.', 1) if extension not in no_download_extensions: content_type = self.content_type_from_file_name(file_name) dict_metadata = {'Content-Disposition': str("Content-Disposition: attachment; filename=" + file_name + ";"), 'Content-Type': content_type} file_download = file_name_no_extension + "-download." + extension # file is copied with additional metadata storage_context.copy_resource(orig_resource + file_name, dest_resource + file_download, additional_dict_metadata=dict_metadata) self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "end", "Deposited assets for article " + article_id) except Exception as e: self.logger.exception("Exception when Depositing assets") self.emit_monitor_event(self.settings, article_id, version, run, self.pretty_name, "error", "Error depositing assets for article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return activity.activity.ACTIVITY_SUCCESS
def list_files_from_cdn(self, article_id): storage_context = StorageContext(self.settings) article_path_in_cdn = self.settings.storage_provider + "://" + \ self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket + "/" + \ article_id return storage_context.list_resources(article_path_in_cdn)
def _get_bucket_files(settings, expanded_folder_name, xml_bucket): storage_context = StorageContext(settings) resource = settings.storage_provider + "://" + xml_bucket + "/" + expanded_folder_name files_in_bucket = storage_context.list_resources(resource) return files_in_bucket
def do_activity(self, data=None): """ Do the work """ run = data["run"] session = Session(self.settings) version = session.get_value(run, "version") article_id = session.get_value(run, "article_id") self.emit_monitor_event( self.settings, article_id, version, run, "Deposit assets", "start", "Depositing assets for " + article_id ) try: conn = S3Connection(self.settings.aws_access_key_id, self.settings.aws_secret_access_key) expanded_folder_name = session.get_value(run, "expanded_folder") expanded_folder_bucket = self.settings.publishing_buckets_prefix + self.settings.expanded_bucket expanded_bucket = conn.get_bucket(expanded_folder_bucket) cdn_bucket_name = self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket no_download_extensions = self.get_no_download_extensions(self.settings.no_download_extensions) storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" published_bucket_path = ( self.settings.publishing_buckets_prefix + self.settings.published_bucket + "/articles" ) keys = self.get_keys(expanded_bucket, expanded_folder_name) for key in keys: (file_key, file_name) = key # file_key.copy(cdn_bucket_name, article_id + "/" + file_name) orig_resource = storage_provider + expanded_folder_bucket + "/" + expanded_folder_name + "/" + file_name dest_resource = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_name additional_dest_resource = storage_provider + published_bucket_path + "/" + article_id + "/" + file_name storage_context.copy_resource(orig_resource, dest_resource) storage_context.copy_resource(orig_resource, additional_dest_resource) if self.logger: self.logger.info("Uploaded key %s to %s" % (file_name, cdn_bucket_name)) file_name_no_extension, extension = file_name.rsplit(".", 1) if extension not in no_download_extensions: content_type = self.content_type_from_file_name(file_name) dict_metadata = { "Content-Disposition": str("Content-Disposition: attachment; filename=" + file_name + ";"), "Content-Type": content_type, } file_download = file_name_no_extension + "-download." + extension orig_resource_download = dest_resource dest_resource_download = storage_provider + cdn_bucket_name + "/" + article_id + "/" + file_download additional_dest_resource_download = ( storage_provider + published_bucket_path + "/" + article_id + "/" + file_download ) # file is copied with additional metadata storage_context.copy_resource( orig_resource_download, dest_resource_download, additional_dict_metadata=dict_metadata ) # additional metadata is already set in origin resource so it will be copied accross by default storage_context.copy_resource(dest_resource_download, additional_dest_resource_download) self.emit_monitor_event( self.settings, article_id, version, run, "Deposit assets", "end", "Deposited assets for article " + article_id, ) except Exception as e: self.logger.exception("Exception when Depositing assets") self.emit_monitor_event( self.settings, article_id, version, run, "Deposit assets", "error", "Error depositing assets for article " + article_id + " message:" + e.message, ) return False return True
def do_activity(self, data=None): self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "start", "Starting Updating repository for article " + data['article_id']) # assert all Github settings have are not None when live # if Github settings are null and we are testing, skip activity if None in (self.settings.git_repo_path, self.settings.git_repo_name, self.settings.github_token): import settings as settingsLib if isinstance(self.settings(), settingsLib.live) or isinstance(self.settings(), settingsLib.prod) or \ isinstance(self.settings(), settingsLib.end2end): self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "error", "Error Updating repository for article. Github settings are unavailable.") return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "end", "UpdateRepository got skipped as there are no Github " "settings (Test enviroment).") return True try: xml_file = lax_provider.get_xml_file_name(self.settings, data['article_id'], self.settings.publishing_buckets_prefix + self.settings.ppp_cdn_bucket, data['version']) s3_file_path = data['article_id'] + "/" + xml_file #download xml with tempfile.TemporaryFile(mode='r+') as tmp: storage_context = StorageContext(self.settings) storage_provider = self.settings.storage_provider + "://" published_path = storage_provider + self.settings.publishing_buckets_prefix + \ self.settings.ppp_cdn_bucket resource = published_path + "/" + s3_file_path storage_context.get_resource_to_file(resource, tmp) file_content = storage_context.get_resource_as_string(resource) message = self.update_github(self.settings.git_repo_path + xml_file, file_content) self.logger.info(message) self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "end", "Finished Updating repository for article. Details: " + message) return True except RetryException as e: self.logger.info(e.message) return activity.activity.ACTIVITY_TEMPORARY_FAILURE except Exception as e: self.logger.exception("Exception in do_activity") self.emit_monitor_event(self.settings, data['article_id'], data['version'], data['run'], self.pretty_name, "error", "Error Updating repository for article. Details: " + str(e)) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def do_activity(self, data=None): """ Do the work """ run = data['run'] if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) info = S3NotificationInfo.from_dict(data) storage_context = StorageContext(self.settings) session = Session(self.settings) filename_last_element = session.get_value(run, 'filename_last_element') # zip name contains version information for previously archived zip files article_structure = ArticleInfo(filename_last_element) article_id = article_structure.article_id session.store_value(run, 'article_id', article_id) session.store_value(run, 'file_name', info.file_name) if self.logger: self.logger.info("Expanding file %s" % info.file_name) version = session.get_value(run, 'version') status = article_structure.status if status is None or (status != 'vor' and status != 'poa'): self.logger.error("Name '%s' did not match expected pattern for status" % filename_last_element) return activity.activity.ACTIVITY_PERMANENT_FAILURE # status could not be determined, exit workflow. article_version_id = article_id + '.' + version session.store_value(run, 'article_version_id', article_version_id) session.store_value(run, 'run', run) session.store_value(run, 'status', status) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start", "Starting expansion of article " + article_id) try: # download zip to temp folder tmp = self.get_tmp_dir() local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb') storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name storage_context.get_resource_to_file(storage_resource_origin, local_zip_file) local_zip_file.close() # extract zip contents folder_name = path.join(article_version_id, run) content_folder = path.join(tmp, folder_name) makedirs(content_folder) with ZipFile(path.join(tmp, filename_last_element)) as zf: zf.extractall(content_folder) upload_filenames = [] for f in listdir(content_folder): if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_': upload_filenames.append(f) bucket_folder_name = article_version_id + '/' + run for filename in upload_filenames: source_path = path.join(content_folder, filename) dest_path = bucket_folder_name + '/' + filename storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \ self.settings.expanded_bucket + "/" + dest_path storage_context.set_resource_from_filename(storage_resource_dest, source_path) self.clean_tmp_dir() session.store_value(run, 'expanded_folder', bucket_folder_name) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end", "Finished expansion of article " + article_id + " for version " + version + " run " + str(run) + " into " + bucket_folder_name) except Exception as e: self.logger.exception("Exception when expanding article") self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error", "Error expanding article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return True
def do_activity(self, data=None): """ Do the work """ run = data['run'] if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) info = S3NotificationInfo.from_dict(data) storage_context = StorageContext(self.settings) session = Session(self.settings) filename_last_element = session.get_value(run, 'filename_last_element') # zip name contains version information for previously archived zip files article_structure = ArticleInfo(filename_last_element) article_id = article_structure.article_id session.store_value(run, 'article_id', article_id) session.store_value(run, 'file_name', info.file_name) if self.logger: self.logger.info("Expanding file %s" % info.file_name) version = session.get_value(run, 'version') status = article_structure.status if status is None or (status != 'vor' and status != 'poa'): self.logger.error("Name '%s' did not match expected pattern for status" % filename_last_element) return activity.activity.ACTIVITY_PERMANENT_FAILURE # status could not be determined, exit workflow. article_version_id = article_id + '.' + version session.store_value(run, 'article_version_id', article_version_id) session.store_value(run, 'run', run) session.store_value(run, 'status', status) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start", "Starting expansion of article " + article_id) try: # download zip to temp folder tmp = self.get_tmp_dir() local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb') storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name storage_context.get_resource_to_file(storage_resource_origin, local_zip_file) local_zip_file.close() # extract zip contents folder_name = path.join(article_version_id, run) content_folder = path.join(tmp, folder_name) makedirs(content_folder) with ZipFile(path.join(tmp, filename_last_element)) as zf: zf.extractall(content_folder) upload_filenames = [] for f in listdir(content_folder): if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_': upload_filenames.append(f) self.check_filenames(upload_filenames) bucket_folder_name = article_version_id + '/' + run for filename in upload_filenames: source_path = path.join(content_folder, filename) dest_path = bucket_folder_name + '/' + filename storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \ self.settings.expanded_bucket + "/" + dest_path storage_context.set_resource_from_filename(storage_resource_dest, source_path) self.clean_tmp_dir() session.store_value(run, 'expanded_folder', bucket_folder_name) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end", "Finished expansion of article " + article_id + " for version " + version + " run " + str(run) + " into " + bucket_folder_name) except Exception as e: self.logger.exception("Exception when expanding article") self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error", "Error expanding article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return True