def file_of_published_article(self, filename): eif_filename_without_path = os.path.basename(filename) article_info = ArticleInfo(eif_filename_without_path) version = article_info.get_version_from_zip_filename() update_date = article_info.get_update_date_from_zip_filename() if version != None and update_date != None: return True return False
class TestArticleStructure(unittest.TestCase): @unpack @data({'input': 'elife-07702-vor-r4.zip', 'expected': None}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected':'2012-10-15T00:00:00Z'}) def test_get_update_date_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_update_date_from_zip_filename() self.assertEqual(result, expected) @unpack @data({'input': 'elife-07702-vor-r4.zip', 'expected': None}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '1'}) def test_get_version_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_version_from_zip_filename() self.assertEqual(result, expected)
def get_article_xml_key(bucket, expanded_folder_name): files = bucket.list(expanded_folder_name + "/", "/") for bucket_file in files: key = bucket.get_key(bucket_file.key) filename = key.name.rsplit('/', 1)[1] info = ArticleInfo(filename) if info.file_type == 'ArticleXML': return key, filename return None
def process_key(self, key, cdn_path): # determine filename (without folder) and obtain ArticleInfo instance filename = key.name.rsplit('/', 1)[1] info = ArticleInfo(filename) # see if we have any formats available for the file_type of this file formats = self.get_formats(info.file_type) if formats is not None: # generate images for relevant formats fp = self.get_file_pointer(key) self.generate_images(formats, fp, info, cdn_path)
def get_xml_file_name(self, settings, expanded_folder_name, xml_bucket, version): files = self._get_bucket_files(settings, expanded_folder_name, xml_bucket) for filename in files: info = ArticleInfo(filename) if info.file_type == 'ArticleXML': if version is None: return filename v_number = '-v'+ version + '.' if v_number in filename: return filename return None
def do_activity(self, data=None): try: session = Session(self.settings) version = session.get_value(data['run'], 'version') filename = session.get_value(data['run'], 'filename_last_element') article_structure = ArticleInfo(filename) version_date, error = self.get_version( self.settings, article_structure, article_structure.article_id, version) if error is not None: self.logger.error(error) self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "error", " ".join( ("Error Looking up version article", article_structure.article_id, "message:", error))) return activity.activity.ACTIVITY_PERMANENT_FAILURE self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "end", " ".join( ("Finished Version Lookup for article", article_structure.article_id, "version:", version))) session.store_value(data['run'], 'update_date', version_date) return activity.activity.ACTIVITY_SUCCESS except Exception as e: self.logger.exception( "Exception when trying to Lookup next version") self.emit_monitor_event( self.settings, article_structure.article_id, version, data['run'], self.pretty_name, "error", " ".join( ("Error looking up version for article", article_structure.article_id, "message:", str(e)))) return activity.activity.ACTIVITY_PERMANENT_FAILURE
def test_get_version_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_version_from_zip_filename() self.assertEqual(result, expected)
def find_xml_filename_in_map(self, file_name_map): for old_name, new_name in file_name_map.iteritems(): info = ArticleInfo(new_name) if info.file_type == 'ArticleXML': return new_name
class TestArticleStructure(unittest.TestCase): @unpack @data({'input': 'elife-07702-vor-r4.zip', 'expected': None}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected':'2012-10-15T00:00:00Z'}) def test_get_update_date_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_update_date_from_zip_filename() self.assertEqual(result, expected) @unpack @data({'input': 'elife-07702-vor-r4.zip', 'expected': None}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '1'}) def test_get_version_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_version_from_zip_filename() self.assertEqual(result, expected) @unpack @data( {'input': 'elife-07702-vor-r4.zip', 'expected': 'ArticleZip'}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': 'ArticleZip'}, {'input': 'elife-00666-v1.pdf', 'expected': 'Other'}, {'input': 'elife-00666-v1.xml', 'expected': 'ArticleXML'}, {'input': 'elife-00666-app1-fig1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-app2-video1.mp4', 'expected': 'Other'}, {'input': 'elife-00666-box2-fig1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-code1-v1.xml', 'expected': 'Other'}, {'input': 'elife-00666-data1-v1.xlsx', 'expected': 'Other'}, {'input': 'elife-00666-fig1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig2-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig3-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig3-v10.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig3-video1.mp4', 'expected': 'Other'}, {'input': 'elife-00666-fig4-v1.tif', 'expected': 'Figure'}, {'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': 'Other'}, {'input': 'elife-00666-figures-v1.pdf', 'expected': 'FigurePDF'}, {'input': 'elife-00666-inf001-v1.jpeg', 'expected': 'Inline'}, {'input': 'elife-00666-repstand1-v1.pdf', 'expected': 'Other'}, {'input': 'elife-00666-resp-fig1-v1.png', 'expected': 'Figure'}, {'input': 'elife-00666-resp-video1.mp4', 'expected': 'Other'}, {'input': 'elife-00666-supp1-v1.csv', 'expected': 'Other'}, {'input': 'elife-00666-supp2-v2.tif', 'expected': 'Other'}, {'input': 'elife-00666-supp2-v3.docx', 'expected': 'Other'}, {'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': 'Other'}, {'input': 'elife-00666-video1.mp4', 'expected': 'Other'}, {'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': 'Other'}, ) def test_get_file_type_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.file_type self.assertEqual(result, expected) @unpack @data( {'input': 'elife-07702-vor-r4.zip', 'expected': False}, {'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': False}, {'input': 'elife-00666-v1.pdf', 'expected': False}, {'input': 'elife-00666-v1.xml', 'expected': False}, {'input': 'elife-00666-app1-fig1-v1.tif', 'expected': True}, {'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': True}, {'input': 'elife-00666-app2-video1.mp4', 'expected': False}, {'input': 'elife-00666-box2-fig1-v1.tif', 'expected': True}, {'input': 'elife-00666-code1-v1.xml', 'expected': False}, {'input': 'elife-00666-data1-v1.xlsx', 'expected': False}, {'input': 'elife-00666-fig1-v1.tif', 'expected': True}, {'input': 'elife-00666-fig2-v1.tif', 'expected': True}, {'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': True}, {'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': True}, {'input': 'elife-00666-fig3-v1.tif', 'expected': True}, {'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': True}, {'input': 'elife-00666-fig3-video1.mp4', 'expected': False}, {'input': 'elife-00666-fig4-v1.tif', 'expected': True}, {'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': False}, {'input': 'elife-00666-figures-v1.pdf', 'expected': False}, {'input': 'elife-00666-inf001-v1.jpeg', 'expected': False}, {'input': 'elife-00666-repstand1-v1.pdf', 'expected': False}, {'input': 'elife-00666-resp-fig1-v1.png', 'expected': True}, {'input': 'elife-00666-resp-video1.mp4', 'expected': False}, {'input': 'elife-00666-supp1-v1.csv', 'expected': False}, {'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': False}, {'input': 'elife-00666-video1.mp4', 'expected': False}, {'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': False}, {'input': 'elife-00666-supp1-v1.tif', 'expected': False} ) def test_article_figure(self, input, expected): self.assertEqual(article_structure.article_figure(input), expected) def test_get_original_files(self): files = ['elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-07398-media1.jpg'] expected = ['elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-table3-data1-v1.xlsx'] self.assertListEqual(article_structure.get_original_files(files), expected) def test_get_media_file_images(self): files = ['elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg'] expected = ['elife-00666-video2.jpg', 'elife-07398-media1.jpg'] self.assertListEqual(article_structure.get_media_file_images(files), expected) def test_get_figures_for_iiif(self): "Only .tif of original figures" files = ['elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-6148691793723703318-fig10-v1.gif', 'elife-9204580859652100230-fig2-data1-v1.xls', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg'] expected = ['elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg'] self.assertListEqual(article_structure.get_figures_for_iiif(files), expected) # see https://github.com/elifesciences/elife-continuum-documentation/blob/master/file-naming/file_naming_spec.md def test_get_figures_pdfs(self): files = ['elife-07398-media1.jpg', 'elife-00666-figures-v1.pdf', 'elife-00353-v1.pdf', 'elife-00353-v1.xml', 'elife-18425-figures-v2.pdf'] expected = ['elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf'] self.assertListEqual(article_structure.get_figures_pdfs(files), expected) @data(u'elife-15224-fig1-figsupp1.tif', u'elife-15224-resp-fig1.tif', u'elife-15224-figures.pdf', u'elife-15802-fig9-data3.docx', u'elife-11792.mp4', u'elife-00005-media1-code1.wrl') def test_is_video_file_false(self, filename): result = article_structure.is_video_file(filename) self.assertFalse(result) @data(u'elife-11792-media2.mp4', u'elife-15224-fig1-figsupp1-media.tif', u'elife-11792-video1.mp4', u'elife-99999-resp-media1.avi', u'elife-00005-media1.mov') def test_is_video_file_true(self,filename): result = article_structure.is_video_file(filename) self.assertTrue(result) @data(u'elife-15224-fig1-figsupp1.tif') def test_file_parts(self, filename): prefix, extension = article_structure.file_parts(filename) self.assertEqual(prefix, u'elife-15224-fig1-figsupp1') self.assertEqual(extension, u'tif') def test_get_videos(self): files = [u'elife-13273-fig1-v1.tif', u'elife-13273-fig2-figsupp1-v1.tif', u'elife-13273-fig2-figsupp2-v1.tif', u'elife-13273-fig2-figsupp3-v1.tif', u'elife-13273-fig2-v1.tif', u'elife-13273-fig3-data1-v1.xlsx', u'elife-13273-fig3-figsupp1-v1.tif', u'elife-13273-fig3-figsupp2-v1.tif', u'elife-13273-fig3-figsupp3-v1.tif', u'elife-13273-fig3-figsupp4-v1.tif', u'elife-13273-fig3-figsupp5-v1.tif', u'elife-13273-fig3-v1.tif', u'elife-13273-fig4-figsupp1-v1.tif', u'elife-13273-fig4-v1.tif', u'elife-13273-fig5-data1-v1.xlsx', u'elife-13273-fig5-figsupp1-v1.tif', u'elife-13273-fig5-v1.tif', u'elife-13273-fig6-data1-v1.xlsx', u'elife-13273-fig6-data2-v1.xlsx', u'elife-13273-fig6-figsupp1-v1.tif', u'elife-13273-fig6-figsupp2-v1.tif', u'elife-13273-fig6-v1.tif', u'elife-13273-fig7-v1.tif', u'elife-13273-fig8-v1.tif', u'elife-13273-fig9-v1.tif', u'elife-13273-figures-v1.pdf', u'elife-13273-media1.mp4', u'elife-13273-v1.pdf', u'elife-13273-v1.xml'] result = article_structure.get_videos(files) self.assertListEqual(result, [u'elife-13273-media1.mp4']) def test_pre_ingest_assets(self): files = ['elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-6148691793723703318-fig10-v1.gif', 'elife-9204580859652100230-fig2-data1-v1.xls', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg', 'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf', 'elife-13273-media1.mp4'] expected = ['elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg', 'elife-13273-media1.mp4', 'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf'] self.assertItemsEqual(article_structure.pre_ingest_assets(files), expected)
def test_get_file_type_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.file_type self.assertEqual(result, expected)
def do_activity(self, data=None): """ Do the work """ run = data['run'] if self.logger: self.logger.info('data: %s' % json.dumps(data, sort_keys=True, indent=4)) info = S3NotificationInfo.from_dict(data) storage_context = StorageContext(self.settings) session = Session(self.settings) filename_last_element = session.get_value(run, 'filename_last_element') # zip name contains version information for previously archived zip files article_structure = ArticleInfo(filename_last_element) article_id = article_structure.article_id session.store_value(run, 'article_id', article_id) session.store_value(run, 'file_name', info.file_name) if self.logger: self.logger.info("Expanding file %s" % info.file_name) version = session.get_value(run, 'version') status = article_structure.status if status is None or (status != 'vor' and status != 'poa'): self.logger.error("Name '%s' did not match expected pattern for status" % filename_last_element) return activity.activity.ACTIVITY_PERMANENT_FAILURE # status could not be determined, exit workflow. article_version_id = article_id + '.' + version session.store_value(run, 'article_version_id', article_version_id) session.store_value(run, 'run', run) session.store_value(run, 'status', status) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "start", "Starting expansion of article " + article_id) try: # download zip to temp folder tmp = self.get_tmp_dir() local_zip_file = self.open_file_from_tmp_dir(filename_last_element, mode='wb') storage_resource_origin = self.settings.storage_provider + "://" + info.bucket_name + "/" + info.file_name storage_context.get_resource_to_file(storage_resource_origin, local_zip_file) local_zip_file.close() # extract zip contents folder_name = path.join(article_version_id, run) content_folder = path.join(tmp, folder_name) makedirs(content_folder) with ZipFile(path.join(tmp, filename_last_element)) as zf: zf.extractall(content_folder) upload_filenames = [] for f in listdir(content_folder): if isfile(join(content_folder, f)) and f[0] != '.' and not f[0] == '_': upload_filenames.append(f) self.check_filenames(upload_filenames) bucket_folder_name = article_version_id + '/' + run for filename in upload_filenames: source_path = path.join(content_folder, filename) dest_path = bucket_folder_name + '/' + filename storage_resource_dest = self.settings.storage_provider + "://" + self.settings.publishing_buckets_prefix + \ self.settings.expanded_bucket + "/" + dest_path storage_context.set_resource_from_filename(storage_resource_dest, source_path) self.clean_tmp_dir() session.store_value(run, 'expanded_folder', bucket_folder_name) self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "end", "Finished expansion of article " + article_id + " for version " + version + " run " + str(run) + " into " + bucket_folder_name) except Exception as e: self.logger.exception("Exception when expanding article") self.emit_monitor_event(self.settings, article_id, version, run, "Expand Article", "error", "Error expanding article " + article_id + " message:" + e.message) return activity.activity.ACTIVITY_PERMANENT_FAILURE return True
class TestArticleStructure(unittest.TestCase): @unpack @data({ 'input': 'elife-07702-vor-r4.zip', 'expected': None }, { 'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '2012-10-15T00:00:00Z' }) def test_get_update_date_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_update_date_from_zip_filename() self.assertEqual(result, expected) @unpack @data({ 'input': 'elife-07702-vor-r4.zip', 'expected': None }, { 'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': '1' }) def test_get_version_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.get_version_from_zip_filename() self.assertEqual(result, expected) @unpack @data( { 'input': 'elife-07702-vor-r4.zip', 'expected': 'ArticleZip' }, { 'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': 'ArticleZip' }, { 'input': 'elife-00666-v1.pdf', 'expected': 'Other' }, { 'input': 'elife-00666-v1.xml', 'expected': 'ArticleXML' }, { 'input': 'elife-00666-app1-fig1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-app2-video1.mp4', 'expected': 'Other' }, { 'input': 'elife-00666-box2-fig1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-code1-v1.xml', 'expected': 'Other' }, { 'input': 'elife-00666-data1-v1.xlsx', 'expected': 'Other' }, { 'input': 'elife-00666-fig1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig2-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig3-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig3-v10.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig3-video1.mp4', 'expected': 'Other' }, { 'input': 'elife-00666-fig4-v1.tif', 'expected': 'Figure' }, { 'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': 'Other' }, { 'input': 'elife-00666-figures-v1.pdf', 'expected': 'FigurePDF' }, { 'input': 'elife-00666-inf001-v1.jpeg', 'expected': 'Inline' }, { 'input': 'elife-00666-repstand1-v1.pdf', 'expected': 'Other' }, { 'input': 'elife-00666-resp-fig1-v1.png', 'expected': 'Figure' }, { 'input': 'elife-00666-resp-video1.mp4', 'expected': 'Other' }, { 'input': 'elife-00666-supp1-v1.csv', 'expected': 'Other' }, { 'input': 'elife-00666-supp2-v2.tif', 'expected': 'Other' }, { 'input': 'elife-00666-supp2-v3.docx', 'expected': 'Other' }, { 'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': 'Other' }, { 'input': 'elife-00666-video1.mp4', 'expected': 'Other' }, { 'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': 'Other' }, ) def test_get_file_type_from_zip_filename(self, input, expected): self.articleinfo = ArticleInfo(input) result = self.articleinfo.file_type self.assertEqual(result, expected) @unpack @data({ 'input': 'elife-07702-vor-r4.zip', 'expected': False }, { 'input': 'elife-00013-vor-v1-20121015000000.zip', 'expected': False }, { 'input': 'elife-00666-v1.pdf', 'expected': False }, { 'input': 'elife-00666-v1.xml', 'expected': False }, { 'input': 'elife-00666-app1-fig1-v1.tif', 'expected': True }, { 'input': 'elife-00666-app1-fig1-figsupp1-v1.tif', 'expected': True }, { 'input': 'elife-00666-app2-video1.mp4', 'expected': False }, { 'input': 'elife-00666-box2-fig1-v1.tif', 'expected': True }, { 'input': 'elife-00666-code1-v1.xml', 'expected': False }, { 'input': 'elife-00666-data1-v1.xlsx', 'expected': False }, { 'input': 'elife-00666-fig1-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig2-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig2-figsupp1-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig2-figsupp2-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig3-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig3-figsupp1-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig3-video1.mp4', 'expected': False }, { 'input': 'elife-00666-fig4-v1.tif', 'expected': True }, { 'input': 'elife-00666-fig4-code1-v1.xlsx', 'expected': False }, { 'input': 'elife-00666-figures-v1.pdf', 'expected': False }, { 'input': 'elife-00666-inf001-v1.jpeg', 'expected': False }, { 'input': 'elife-00666-repstand1-v1.pdf', 'expected': False }, { 'input': 'elife-00666-resp-fig1-v1.png', 'expected': True }, { 'input': 'elife-00666-resp-video1.mp4', 'expected': False }, { 'input': 'elife-00666-supp1-v1.csv', 'expected': False }, { 'input': 'elife-00666-table3-data1-v1.xlsx', 'expected': False }, { 'input': 'elife-00666-video1.mp4', 'expected': False }, { 'input': 'elife-00666-video1-data1-v1.xlsx', 'expected': False }, { 'input': 'elife-00666-supp1-v1.tif', 'expected': False }) def test_article_figure(self, input, expected): self.assertEqual(article_structure.article_figure(input), expected) def test_get_original_files(self): files = [ 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-07398-media1.jpg' ] expected = [ 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v10.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-table3-data1-v1.xlsx' ] self.assertListEqual(article_structure.get_original_files(files), expected) def test_get_media_file_images(self): files = [ 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg' ] expected = ['elife-00666-video2.jpg', 'elife-07398-media1.jpg'] self.assertListEqual(article_structure.get_media_file_images(files), expected) def test_get_figures_for_iiif(self): "Only .tif of original figures" files = [ 'elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-6148691793723703318-fig10-v1.gif', 'elife-9204580859652100230-fig2-data1-v1.xls', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg' ] expected = [ 'elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg' ] self.assertListEqual(article_structure.get_figures_for_iiif(files), expected) # see https://github.com/elifesciences/elife-continuum-documentation/blob/master/file-naming/file_naming_spec.md def test_get_figures_pdfs(self): files = [ 'elife-07398-media1.jpg', 'elife-00666-figures-v1.pdf', 'elife-00353-v1.pdf', 'elife-00353-v1.xml', 'elife-18425-figures-v2.pdf' ] expected = ['elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf'] self.assertListEqual(article_structure.get_figures_pdfs(files), expected) @data(u'elife-15224-fig1-figsupp1.tif', u'elife-15224-resp-fig1.tif', u'elife-15224-figures.pdf', u'elife-15802-fig9-data3.docx', u'elife-11792.mp4', u'elife-00005-media1-code1.wrl') def test_is_video_file_false(self, filename): result = article_structure.is_video_file(filename) self.assertFalse(result) @data(u'elife-11792-media2.mp4', u'elife-15224-fig1-figsupp1-media.tif', u'elife-11792-video1.mp4', u'elife-99999-resp-media1.avi', u'elife-00005-media1.mov') def test_is_video_file_true(self, filename): result = article_structure.is_video_file(filename) self.assertTrue(result) @data(u'elife-15224-fig1-figsupp1.tif') def test_file_parts(self, filename): prefix, extension = article_structure.file_parts(filename) self.assertEqual(prefix, u'elife-15224-fig1-figsupp1') self.assertEqual(extension, u'tif') def test_get_videos(self): files = [ u'elife-13273-fig1-v1.tif', u'elife-13273-fig2-figsupp1-v1.tif', u'elife-13273-fig2-figsupp2-v1.tif', u'elife-13273-fig2-figsupp3-v1.tif', u'elife-13273-fig2-v1.tif', u'elife-13273-fig3-data1-v1.xlsx', u'elife-13273-fig3-figsupp1-v1.tif', u'elife-13273-fig3-figsupp2-v1.tif', u'elife-13273-fig3-figsupp3-v1.tif', u'elife-13273-fig3-figsupp4-v1.tif', u'elife-13273-fig3-figsupp5-v1.tif', u'elife-13273-fig3-v1.tif', u'elife-13273-fig4-figsupp1-v1.tif', u'elife-13273-fig4-v1.tif', u'elife-13273-fig5-data1-v1.xlsx', u'elife-13273-fig5-figsupp1-v1.tif', u'elife-13273-fig5-v1.tif', u'elife-13273-fig6-data1-v1.xlsx', u'elife-13273-fig6-data2-v1.xlsx', u'elife-13273-fig6-figsupp1-v1.tif', u'elife-13273-fig6-figsupp2-v1.tif', u'elife-13273-fig6-v1.tif', u'elife-13273-fig7-v1.tif', u'elife-13273-fig8-v1.tif', u'elife-13273-fig9-v1.tif', u'elife-13273-figures-v1.pdf', u'elife-13273-media1.mp4', u'elife-13273-v1.pdf', u'elife-13273-v1.xml' ] result = article_structure.get_videos(files) self.assertListEqual(result, [u'elife-13273-media1.mp4']) def test_pre_ingest_assets(self): files = [ 'elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-fig2-figsupp2-v1.jpg', 'elife-00666-inf001-v1.jpg', 'elife-00666-inf001-v1-80w.jpg', 'elife-00666-table3-data1-v1.xlsx', 'elife-07702-vor-r4.zip', 'elife-6148691793723703318-fig10-v1.gif', 'elife-9204580859652100230-fig2-data1-v1.xls', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg', 'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf', 'elife-13273-media1.mp4' ] expected = [ 'elife-00666-app1-fig1-figsupp1-v1.tif', 'elife-00666-fig2-figsupp2-v1.tif', 'elife-00666-video2.jpg', 'elife-07398-media1.jpg', 'elife-13273-media1.mp4', 'elife-00666-figures-v1.pdf', 'elife-18425-figures-v2.pdf' ] self.assertItemsEqual(article_structure.pre_ingest_assets(files), expected)