def get_article_xml_key(bucket, expanded_folder_name): files = bucket.list(expanded_folder_name + "/", "/") for bucket_file in files: key = bucket.get_key(bucket_file.key) filename = key.name.rsplit('/', 1)[1] info = ArticleInfo(filename) if info.file_type == 'ArticleXML': return key, filename return None
def process_key(self, key, cdn_path): # determine filename (without folder) and obtain ArticleInfo instance filename = key.name.rsplit('/', 1)[1] info = ArticleInfo(filename) # see if we have any formats available for the file_type of this file formats = self.get_formats(info.file_type) if formats is not None: # generate images for relevant formats fp = self.get_file_pointer(key) self.generate_images(formats, fp, info, cdn_path)
def get_xml_file_name(self, settings, expanded_folder_name, xml_bucket, version): files = self._get_bucket_files(settings, expanded_folder_name, xml_bucket) for filename in files: info = ArticleInfo(filename) if info.file_type == 'ArticleXML': if version is None: return filename v_number = '-v'+ version + '.' if v_number in filename: return filename return None
def do_activity(self, data=None): try: session = Session(self.settings) version = session.get_value(data['run'], 'version') filename = session.get_value(data['run'], 'filename_last_element') article_structure = ArticleInfo(filename) version_date, error = self.get_version( self.settings, article_structure, article_structure.article_id, version) if error is not None: self.logger.error(error) self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "error", " ".join( ("Error Looking up version article", article_structure.article_id, "message:", error))) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "end", " ".join( ("Finished Version Lookup for article", article_structure.article_id, "version:", version))) session.store_value(data['run'], 'update_date', version_date) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception( "Exception when trying to Lookup next version") self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "error", " ".join( ("Error looking up version for article", article_structure.article_id, "message:", str(e)))) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def find_xml_filename_in_map(self, file_name_map): for old_name, new_name in file_name_map.iteritems(): info = ArticleInfo(new_name) if info.file_type == 'ArticleXML': return new_name
def do_activity(self, data=None): """ Do the work """ run = data['run'] if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) info = S3NotificationInfo.from_dict(data) storage_context = StorageContext(self.settings) session = Session(self.settings) filename_last_element = session.get_value(run, 'filename_last_element') # zip name contains version information for previously archived zip files article_structure = ArticleInfo(filename_last_element) article_id = article_structure.article_id session.store_value(run, 'article_id', article_id) session.store_value(run, 'file_name', info.file_name) if self.logger: self.logger.info("Expanding file %s" % info.file_name) version = session.get_value(run, 'version') status = article_structure.status if status is None or (status != 'vor' and status != 'poa'): self.logger.error("Name '%s' did not match expected pattern for status" % filename_last_element) return activity.activity.ACTIVITY_PERMANENT_FAILURE # status could not be determined, exit workflow. article_version_id = article_id + '.' + version session.store_value(run, 'article_version_id', article_version_id) session.store_value(run, 'run', run) session.store_value(run, 'status', status) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start", "Starting expansion of article " + article_id) try: # download zip to temp folder tmp = self.get_tmp_dir() local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb') storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name storage_context.get_resource_to_file(storage_resource_origin, local_zip_file) local_zip_file.close() # extract zip contents folder_name = path.join(article_version_id, run) content_folder = path.join(tmp, folder_name) makedirs(content_folder) with ZipFile(path.join(tmp, filename_last_element)) as zf: zf.extractall(content_folder) upload_filenames = [] for f in listdir(content_folder): if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_': upload_filenames.append(f) self.check_filenames(upload_filenames) bucket_folder_name = article_version_id + '/' + run for filename in upload_filenames: source_path = path.join(content_folder, filename) dest_path = bucket_folder_name + '/' + filename storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \ self.settings.expanded_bucket + "/" + dest_path storage_context.set_resource_from_filename(storage_resource_dest, source_path) self.clean_tmp_dir() session.store_value(run, 'expanded_folder', bucket_folder_name) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end", "Finished expansion of article " + article_id + " for version " + version + " run " + str(run) + " into " + bucket_folder_name) except Exception as e: self.logger.exception("Exception when expanding article") self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error", "Error expanding article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return True
def test_get_version_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_version_from_zip_filename() self.assertEqual(result, expected)
def test_get_file_type_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.file_type self.assertEqual(result, expected)