def _bibdoc_file_list(recid): import os, os.path from invenio.legacy.bibdocfile.api import BibRecDocs try: recdocs = BibRecDocs(recid) except: current_app.logger.error( "REST API: Error while building BibRecDocs for record %d" % (recid, )) return [] files = [] for d in recdocs.list_bibdocs(): df = d.list_latest_files() if not df: continue filename = df[0].get_full_name().decode('utf-8') docname, doctype = os.path.splitext(filename) if doctype.startswith('.'): doctype = doctype[1:] files.append({ 'id': d.get_id(), 'name': docname, 'type': doctype, 'size': df[0].get_size(), }) return files
def _documents_has_been_updated(self, recid): from invenio.legacy.bibdocfile.api import BibRecDocs import datetime bibdocs = BibRecDocs(recid) #TODO: replace legacy code from invenio.legacy.dbquery import run_sql (record_creation_date, record_modification_date) = \ run_sql("SELECT creation_date, modification_date from bibrec where id=%s" % (recid))[0] #wait for a JsonAlchemy bug resolution #record = self._get_record(recid) #record_modification_date = \ # datetime.datetime.strptime(record.get("modification_date"), # "%Y-%m-%dT%H:%M:%S") #record_creation_date = \ # datetime.datetime.strptime(record.get("creation_date"), # "%Y-%m-%dT%H:%M:%S.%f") if not bibdocs.list_bibdocs(): self.app.logger.debug("No docs for: %s" % recid) for b in bibdocs.list_bibdocs(): #should add fews seconds for rounding problem if b.md + datetime.timedelta( seconds=2) >= record_modification_date: return True return False
def tearDown(self): from invenio.legacy.bibdocfile.api import BibRecDocs my_bibrecdoc = BibRecDocs(2) #delete my_bibrecdoc.delete_bibdoc('img_test') my_bibrecdoc.delete_bibdoc('file') my_bibrecdoc.delete_bibdoc('test')
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): abstract = get_field_content_in_utf8(recid, 'abstract', tags_to_index) author = get_field_content_in_utf8(recid, 'author', tags_to_index) keyword = get_field_content_in_utf8(recid, 'keyword', tags_to_index) title = get_field_content_in_utf8(recid, 'title', tags_to_index) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') except: fulltext = '' solr_add(recid, abstract, author, fulltext, keyword, title) next_commit_counter = solr_commit_if_necessary(next_commit_counter, recid=recid) return next_commit_counter
def file(recid=None, filename=None): """Serve restricted file for record using provided token. This is a simple reimplementation of legacy bibdocfile file serving. Only the latest version of a file can be served. Note a generated link is independent of """ if not SecretLink.validate_token(request.args.get('token'), dict(recid=recid)): return abort(404) try: bibarchive = BibRecDocs(recid) except InvenioBibDocFileError: current_app.logger.warning("File not found.", exc_info=True) abort(404) if bibarchive.deleted_p(): abort(410) f = get_bibdocfile(bibarchive, filename) if f is None: abort(404) return send_file(f.get_path())
def get_media_from_recid(recid): ''' This method get the file in the given url @param recid: id of the file to get ''' medias = [] bibarchiv = BibRecDocs(recid) bibdocs = bibarchiv.list_latest_files() for bibdocfile in bibdocs: bibfile = {'name': bibdocfile.get_full_name(), 'file': '', 'type': 'application/%s' % \ bibdocfile.get_superformat().split(".")[-1], 'path': bibdocfile.get_full_path(), 'collection': bibdocfile.get_type(), 'size': bibdocfile.get_size(), 'loaded': False, 'selected': ''} if bibfile['collection'] == "Main": bibfile['selected'] = 'checked=yes' medias.append(bibfile) return medias
def format_element(bfo, subformat="480p"): """ Creates HTML5 source elements for the given subformat. MP4, WebM and OGV are currently supported as video sources. The function will scan the bibdocfiles attached to the record for videos with these formats and the fiven subformat. @param subformat: BibDocFile subformat to create the sources from (e.g. 480p) """ video_sources = [] recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() == subformat: src = bibdocfile.get_url() ftype = bibdocfile.get_superformat()[1:] if ftype == 'mp4': codecs = 'avc1.42E01E, mp4a.40.2' elif ftype == 'webm': codecs = 'vp8, vorbis' elif ftype == 'ogv': codecs = 'theora, vorbis' source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % (src, ftype, codecs) video_sources.append(source) return "\n".join(video_sources)
def create_download_popup(bfo): """Create the complete download popup""" elements = [] recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() ## Go through all the BibDocs and search for video related signatures for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: ## When a video signature is found, add it as an element if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv', '.mov', '.wmv', '.avi', '.mpeg', '.flv', '.mkv'): url = bibdocfile.get_url() codec = bibdocfile.get_superformat()[1:] resolution = bibdocfile.get_subformat() size = bibdocfile.get_size() elements.append(create_download_element(url, codec, size, resolution)) if elements: return html_skeleton_popup % { 'elements': "\n".join(elements) } else: return ""
def goto(cc=CFG_SITE_NAME, p='', f='', sf='date', so='d', docname='', format=''): """ Redirect the user to the latest record in the given collection. Redirect the user to the latest record in the given collection, optionally within the specified pattern and field. If docname and format are specified, redirect the user to the corresponding docname and format. If docname it is not specified, but there is only a single bibdoc attached to the record will redirect to that one. """ recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so) if recids: # The first is the most recent because they are sorted by date # descending. recid = recids[0] url = '/%s/%s' % (CFG_SITE_RECORD, recid) if format: bibrecdocs = BibRecDocs(recid) if not docname: if len(bibrecdocs.get_bibdoc_names()) == 1: docname = bibrecdocs.get_bibdoc_names()[0] else: return url try: bibdoc = BibRecDocs(recid).get_bibdoc(docname) except InvenioBibDocFileError: return url try: bibdocfile = bibdoc.get_file(format=format) return bibdocfile.get_url() except InvenioBibDocFileError: return url return url
def format_element(bfo, template='record_hb.html', subformat_re='icon.*', as_url=False, **kwargs): bibarchive = BibRecDocs(bfo.recID) docs = bibarchive.list_bibdocs() if len(docs) > 0: doc = docs[0] icon = doc.get_icon(subformat_re=re.compile(subformat_re)) if not icon: icon = doc.get_icon() if not icon: return "" else: if as_url: return icon.get_url() else: ctx = { 'icon': icon, 'bfo': bfo, 'CFG_SITE_URL': current_app.config['CFG_SITE_URL'], } return template_icon.render(**ctx)
class BibDocFsInfoTest(InvenioTestCase): """Regression tests about the table bibdocfsinfo""" def setUp(self): from invenio.legacy.bibdocfile.api import BibRecDocs self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file( pkg_resources.resource_filename( 'invenio_demosite.testsuite.regression', 'data/test.jpg'), docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id def tearDown(self): self.my_bibdoc.expunge() def test_hard_delete(self): """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" from invenio.legacy.dbquery import run_sql self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) self.my_bibdoc.add_file_new_version( pkg_resources.resource_filename( 'invenio_demosite.testsuite.regression', 'data/test.gif')) self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False) self.my_bibdoc.delete_file('.gif', 2) self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True)
def format_element(bfo): """ Display image of the thumbnail plot if we are in selected plots collections """ ## To achieve this, we take the Thumb file associated with this document bibarchive = BibRecDocs(bfo.recID) img_files = [] for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() if img_location == "": continue img = '<img src="%s" width="100px"/>' % (img_location) img_files.append((index, img_location)) # FIXME: was link here if _file.get_type() == "Thumb": img_location = _file.get_url() img = '<img src="%s" width="100px"/>' % (img_location) return '<div align="left">' + img + "</div>" # then we use the default: the last plot with an image img_files = sorted(img_files, key=lambda x: x[0]) if img_files: return '<div align="left">' + img_files[-1][1] + "</div>" else: return ""
def _documents_has_been_updated(self, recid): from invenio.legacy.bibdocfile.api import BibRecDocs import datetime bibdocs = BibRecDocs(recid) # TODO: replace legacy code from invenio.legacy.dbquery import run_sql (record_creation_date, record_modification_date) = run_sql( "SELECT creation_date, modification_date from bibrec where id=%s" % (recid) )[0] # wait for a JsonAlchemy bug resolution # record = self._get_record(recid) # record_modification_date = \ # datetime.datetime.strptime(record.get("modification_date"), # "%Y-%m-%dT%H:%M:%S") # record_creation_date = \ # datetime.datetime.strptime(record.get("creation_date"), # "%Y-%m-%dT%H:%M:%S.%f") if not bibdocs.list_bibdocs(): self.app.logger.debug("No docs for: %s" % recid) for b in bibdocs.list_bibdocs(): # should add fews seconds for rounding problem if b.md + datetime.timedelta(seconds=2) >= record_modification_date: return True return False
def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc from invenio.legacy.bibdocfile.api import BibRecDocs my_bibrecdoc = BibRecDocs(2) timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6])) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp) my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path # we should not test for particular path ! this is in the gestion of the underlying implementation, # not the interface which should ne tested # self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR)) # self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR)) #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display import invenio.legacy.template tmpl = invenio.legacy.template.load("bibdocfile") value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #check modification date self.assertEqual(my_new_bibdocfile.md, timestamp) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True)
def setUp(self): from invenio.legacy.bibdocfile.api import BibRecDocs self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file( pkg_resources.resource_filename( 'invenio_demosite.testsuite.regression', 'data/test.jpg'), docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id
def format_element(bfo, width="", caption="yes", max="-1"): """ Display image of the plot if we are in selected plots collections @param width: the width of the returned image (Eg: '100px') @param caption: display the captions or not? @param max: the maximum number of plots to display (-1 is all plots) """ ## To achieve this, we take the pngs associated with this document img_files = [] max = int(max) bibarchive = BibRecDocs(bfo.recID) if width != "": width = 'width="%s"' % width for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": try: caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() except: # FIXME: we have hit probably a plot context file, # so ignore this document; but it would be safer # to check subformat type, so that we don't mask # other eventual errors here. continue img = '<img src="%s" title="%s" %s/>' % \ (img_location, caption_text, width) link = create_html_link(urlbase='%s/%s/%s/plots#%d' % (CFG_SITE_URL, CFG_SITE_RECORD, bfo.recID,\ index), urlargd={}, link_label=img) img_files.append((index, link)) img_files = sorted(img_files, key=lambda x: x[0]) if max > 0: img_files = img_files[:max] for index in range(len(img_files)): img_files[index] = img_files[index][1] if len(img_files) == 0: return '' return '<div style="overflow-x:scroll;width=100%;white-space:nowrap">' +\ " ".join(img_files) + '</div>'
def get_filetypes(recid): """ Returns filetypes extensions associated with given record. Takes as a parameter the recid of a record. @param url_field: recid of a record """ from invenio.legacy.bibdocfile.api import BibRecDocs docs = BibRecDocs(recid) return [_get_filetype(d.format) for d in docs.list_latest_files()]
def list_pdfs(recid): rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for ext in ('pdf', 'pdfa', 'PDF'): try: yield doc.get_file(ext) except InvenioBibDocFileError: pass
def format_element(bfo): """ Format element function to create the select and option elements with HTML5 data attributes that store all the necesarry metadata to construct video sources with JavaScript.""" videos = { "360p": {"width": 640, "height": 360, "poster": None, "mp4": None, "webm": None, "ogv": None}, "480p": {"width": 854, "height": 480, "poster": None, "mp4": None, "webm": None, "ogv": None}, "720p": {"width": 1280, "height": 720, "poster": None, "mp4": None, "webm": None, "ogv": None}, "1080p": {"width": 1920, "height": 1080, "poster": None, "mp4": None, "webm": None, "ogv": None}, } recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() ## Go through all the BibDocs and search for video related signatures for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: ## When a video signature is found, add the url to the videos dictionary if bibdocfile.get_superformat() in (".mp4", ".webm", ".ogv") and bibdocfile.get_subformat() in ( "360p", "480p", "720p", "1080p", ): src = bibdocfile.get_url() codec = bibdocfile.get_superformat()[1:] size = bibdocfile.get_subformat() videos[size][codec] = src ## When a poster signature is found, add the url to the videos dictionary elif bibdocfile.get_comment() in ("POSTER") and bibdocfile.get_subformat() in ( "360p", "480p", "720p", "1080p", ): src = bibdocfile.get_url() size = bibdocfile.get_subformat() videos[size]["poster"] = src ## Build video select options for every video size format that was found select_options = [] for key, options in iteritems(videos): ## If we have at least one url, the format is available if options["mp4"] or options["webm"] or options["ogv"]: ## create am option element option_element = create_option_element( url_webm=options["webm"], url_ogv=options["ogv"], url_mp4=options["mp4"], url_poster=options["poster"], width=options["width"], height=options["height"], subformat=key, ) select_options.append(option_element) select_element = create_select_element(select_options) return select_element
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir, current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir,current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def _bibdoc_modify_files(recid, form): from invenio.legacy.bibdocfile.api import BibRecDocs try: recdocs = BibRecDocs(recid) except: current_app.logger.error( "REST API: Error while building BibRecDocs for record %d" % (recid, )) return [] actions = {} for (k, v) in form.items(): if k.startswith('__file__name__'): docid = int(k[len('__file__name__'):]) docname = recdocs.get_docname(docid) if docname != v: actions[docid] = ('rename', docname, v) if k.startswith('__file__delete__') and v == 'Delete': docid = int(k[len('__file__delete__'):]) docname = recdocs.get_docname(docid) actions[docid] = ('delete', docname, None) # overwrite rename for (_, (act, docname, newname)) in actions.items(): if act == 'delete': current_app.logger.info("deleting bibdoc/file: {}/'{}'".format( recid, docname)) recdocs.delete_bibdoc(docname) elif act == 'rename': current_app.logger.info( "renaming bibdoc/file: {}/'{}' -> '{}'".format( recid, docname, newname)) recdocs.change_name(newname=newname, oldname=docname)
def upload_fulltext(recid, path): ''' This method save the uploaded file to associated record @param recid: id of the record @param path: uploaded document to store ''' # upload the file to the record bibarchiv = BibRecDocs(recid) docname = path.split('/')[-1].split('.')[0] doctype = path.split('.')[-1].split(';')[0] bibarchiv.add_new_file(path, CFG_DOCTYPE_UPLOAD_COLLECTION, docname, format=doctype) return ''
def get(self, recid): from invenio.legacy.bibdocfile.api import BibRecDocs from invenio.legacy.search_engine import check_user_can_view_record record = get_record(recid) if not record: abort(404) auth_code, _ = check_user_can_view_record(current_user, recid) if auth_code: abort(401) ids = [recid] for k in ['rel_dataset', 'rel_software']: ids.extend([int(r) for r in record.get(k, [])]) files = [] for recid in ids: record_files = BibRecDocs(recid).list_latest_files( list_hidden=False) files.extend( map( lambda f: { 'id': f.docid, 'name': '%s%s' % (f.name, f.format), 'url': url_for( 'recordfileresource', recid=recid, fileid=f.docid), }, filter(lambda f: not f.is_icon(), record_files))) return files
def _add_bibdoc_files(): from invenio.legacy.bibdocfile.api import BibRecDocs return dict(zenodo_files=[ f for f in BibRecDocs(kwargs['recid'], human_readable=True). list_latest_files(list_hidden=False) if not f.is_icon() and f.is_restricted(current_user)[0] == 0 ])
def check_record(record): """ Validates the checksum of all the BibDocFile's in the record """ record_id = record["001"][0][3] docs = BibRecDocs(record_id).list_bibdocs() for doc in docs: for bibfile in doc.list_latest_files(): if not os.path.exists(bibfile.fullpath): record.set_invalid("File doesn't exists %s" % bibfile.fullpath) continue if not bibfile.check(): record.set_invalid("Invalid checksum for file %s" % bibfile.fullpath) if HAS_MAGIC: if HAS_MAGIC == 1: magic_mime = magic.from_file(bibfile.fullpath, mime=True) else: magic_mime = magic_object.file(bibfile.fullpath) if bibfile.mime != magic_mime: record.set_invalid( ("Guessed mime type from extension (%s) is different" + "from guessed mime type from headers (%s)") % (bibfile.mime, magic_mime))
class BibDocFsInfoTest(InvenioTestCase): """Regression tests about the table bibdocfsinfo""" def setUp(self): from invenio.legacy.bibdocfile.api import BibRecDocs self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file( pkg_resources.resource_filename( 'invenio_demosite.testsuite.regression', 'data/test.jpg'), docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id def tearDown(self): self.my_bibdoc.expunge() def test_hard_delete(self): """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" from invenio.legacy.dbquery import run_sql self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) self.my_bibdoc.add_file_new_version( pkg_resources.resource_filename( 'invenio_demosite.testsuite.regression', 'data/test.gif')) self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False) self.my_bibdoc.delete_file('.gif', 2) self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True)
def get_pdf_snippets(recID, patterns, user_info): """ Extract text snippets around 'patterns' from the newest PDF file of 'recID' The search is case-insensitive. The snippets are meant to look like in the results of the popular search engine: using " ... " between snippets. For empty patterns it returns "" @param recID: record ID to consider @param patterns: list of patterns to retrieve @param user_info: the user_info object from collect_user_info @return: snippet """ from invenio.legacy.bibdocfile.api import BibRecDocs, check_bibdoc_authorization text_path = "" text_path_courtesy = "" for bd in BibRecDocs(recID).list_bibdocs(): # Show excluded fulltext in snippets on Inspire, otherwise depending on authorization if hasattr(bd, 'get_text') and (CFG_INSPIRE_SITE or not check_bibdoc_authorization(user_info, bd.get_status())[0]): text_path = bd.get_text_path() text_path_courtesy = bd.get_status() if CFG_INSPIRE_SITE and not text_path_courtesy: # get courtesy from doctype, since docstatus was empty: text_path_courtesy = bd.get_type() if text_path_courtesy == 'INSPIRE-PUBLIC': # but ignore 'INSPIRE-PUBLIC' doctype text_path_courtesy = '' break # stop at the first good PDF textable file nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0) max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS.get('', 0) if text_path_courtesy in CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS: nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS[text_path_courtesy] if text_path_courtesy in CFG_WEBSEARCH_FULLTEXT_SNIPPETS: max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS[text_path_courtesy] if text_path and nb_chars and max_snippets: out = '' if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'native': out = get_text_snippets(text_path, patterns, nb_chars, max_snippets) if not out: # no hit, so check stemmed versions: from invenio.legacy.bibindex.engine_stemmer import stem stemmed_patterns = [stem(p, 'en') for p in patterns] out = get_text_snippets(text_path, stemmed_patterns, nb_chars, max_snippets) elif CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'SOLR': from invenio.legacy.miscutil.solrutils_bibindex_searcher import solr_get_snippet out = solr_get_snippet(patterns, recID, nb_chars, max_snippets) if out: out_courtesy = "" if CFG_INSPIRE_SITE and text_path_courtesy: out_courtesy = '<strong>Snippets courtesy of ' + text_path_courtesy + '</strong><br>' return '%s%s' % (out_courtesy, out) else: return "" else: return ""
def fix_recid(recid, logfile): """Fix a given recid.""" print("Upgrading record %s ->" % recid, end=' ') print("Upgrading record %s:" % recid, file=logfile) bibrec = BibRecDocs(recid) print(bibrec, file=logfile) docnames = bibrec.get_bibdoc_names() try: for docname in docnames: print(docname, end=' ') new_bibdocs = bibrec.fix(docname) new_bibdocnames = [ bibrec.get_docname(bibdoc.id) for bibdoc in new_bibdocs ] if new_bibdocnames: print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), end=' ') print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), file=logfile) except InvenioBibDocFileError as e: print(BibRecDocs(recid), file=logfile) print("%s -> ERROR", e) return False else: print(BibRecDocs(recid), file=logfile) print("-> OK") return True
def _bibdoc_modify_files(recid, form): from invenio.legacy.bibdocfile.api import BibRecDocs try: recdocs = BibRecDocs(recid) except: current_app.logger.error("REST API: Error while building BibRecDocs for record %d" % (recid,)) return [] actions = {} for (k,v) in form.items(): if k.startswith('__file__name__'): docid = int(k[len('__file__name__'):]) docname = recdocs.get_docname(docid) if docname != v: actions[docid] = ('rename', docname, v) if k.startswith('__file__delete__') and v == 'Delete': docid = int(k[len('__file__delete__'):]) docname = recdocs.get_docname(docid) actions[docid] = ('delete', docname, None) # overwrite rename for (_,(act, docname, newname)) in actions.items(): if act == 'delete': current_app.logger.info("deleting bibdoc/file: {}/'{}'".format(recid, docname)) recdocs.delete_bibdoc(docname) elif act == 'rename': current_app.logger.info("renaming bibdoc/file: {}/'{}' -> '{}'".format(recid, docname, newname)) recdocs.change_name(newname=newname, oldname=docname)
def _bibdocfile_update(obj, eng): if process: d = Deposition(obj) sip = d.get_latest_sip(sealed=False) recid = sip.metadata.get('recid') if recid: brd = BibRecDocs(int(recid)) process(d, brd) d.update()
def xapian_add_all(lower_recid, upper_recid): """ Adds the regarding field values of all records from the lower recid to the upper one to Xapian. It preserves the fulltext information. """ xapian_init_databases() for recid in range(lower_recid, upper_recid + 1): try: abstract = unicode( get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8') except: abstract = "" xapian_add(recid, "abstract", abstract) try: first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0] additional_authors = reduce( lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '') author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" xapian_add(recid, "author", author) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') except: fulltext = "" xapian_add(recid, "fulltext", fulltext) try: keyword = unicode( reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8') except: keyword = "" xapian_add(recid, "keyword", keyword) try: title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8') except: title = "" xapian_add(recid, "title", title)
def bst_openaire_check_rights(): """ Tasklet to verify access rights consistency. """ restrictions = { 'cc0': '', 'openAccess': '', 'closedAccess': 'status: closedAccess', 'restrictedAccess': 'status: restrictedAccess', 'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any', } errors = [] keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys() for access_rights in keys: write_message( "Checking records with access rights '%s'" % access_rights) recids = search_pattern(p=access_rights, f="542__l") for r in recids: date = '' if access_rights == 'embargoedAccess': try: date = get_fieldvalues(r, "942__a")[0] except IndexError: raise Exception( "Embargoed record %s is missing embargo date in 942__a" % r ) expected_status = restrictions[access_rights] % {'date': date} brd = BibRecDocs(r) for d in brd.list_bibdocs(): real_status = d.get_status() if real_status != expected_status: d.set_status(expected_status) write_message( "Fixed record %s with wrong status. From: %s To: %s" % (r, real_status, expected_status)) for e in errors: write_message(e)
def download_one(recid, version): """Download given version of the PDF from arxiv""" write_message('fetching %s' % recid) for count, arxiv_id in enumerate(extract_arxiv_ids_from_recid(recid)): if count != 0: write_message("Warning: %s has multiple arxiv #" % recid) continue url_for_pdf = build_arxiv_url(arxiv_id, version) filename_arxiv_id = arxiv_id.replace('/', '_') temp_file = NamedTemporaryFile(prefix="arxiv-pdf-checker", dir=CFG_TMPSHAREDDIR, suffix="%s.pdf" % filename_arxiv_id) write_message('downloading pdf from %s' % url_for_pdf) path = download_external_url(url_for_pdf, temp_file.name, content_type='pdf') # Check if it is not an html not found page filesize = os.path.getsize(path) if filesize < 25000: f = open(path) try: for line in f: if 'PDF unavailable' in line: raise PdfNotAvailable() finally: f.close() docs = BibRecDocs(recid) bibdocfiles = docs.list_latest_files(doctype="arXiv") needs_update = False try: bibdocfile = bibdocfiles[0] except IndexError: bibdocfile = None needs_update = True else: existing_md5 = calculate_md5(bibdocfile.fullpath) new_md5 = calculate_md5(path.encode('utf-8')) if new_md5 != existing_md5: write_message('md5 differs updating') needs_update = True else: write_message('md5 matches existing pdf, skipping') if needs_update: if bibdocfiles: write_message('adding as new version') docs.add_new_version(path, docname=bibdocfile.name) else: write_message('adding as new file') docs.add_new_file(path, doctype="arXiv", docname="arXiv:%s" % filename_arxiv_id) else: raise FoundExistingPdf()
def bst_openaire_check_rights(): """ Tasklet to verify access rights consistency. """ restrictions = { 'cc0': '', 'openAccess': '', 'closedAccess': 'status: closedAccess', 'restrictedAccess': 'status: restrictedAccess', 'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any', } errors = [] keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys() for access_rights in keys: write_message("Checking records with access rights '%s'" % access_rights) recids = search_pattern(p=access_rights, f="542__l") for r in recids: date = '' if access_rights == 'embargoedAccess': try: date = get_fieldvalues(r, "942__a")[0] except IndexError: raise Exception( "Embargoed record %s is missing embargo date in 942__a" % r) expected_status = restrictions[access_rights] % {'date': date} brd = BibRecDocs(r) for d in brd.list_bibdocs(): real_status = d.get_status() if real_status != expected_status: d.set_status(expected_status) write_message( "Fixed record %s with wrong status. From: %s To: %s" % (r, real_status, expected_status)) for e in errors: write_message(e)
def _get_text(self, recid): from invenio.legacy.bibdocfile.api import BibRecDocs text = BibRecDocs(recid).get_text(True) if not text: self.app.logger.debug("No text for:%s" % recid) return None return { "fulltext": text, "recid": recid, "_id": recid, "_parent": recid }
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): abstract = get_field_content_in_utf8(recid, 'abstract', tags_to_index) author = get_field_content_in_utf8(recid, 'author', tags_to_index) keyword = get_field_content_in_utf8(recid, 'keyword', tags_to_index) title = get_field_content_in_utf8(recid, 'title', tags_to_index) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') except: fulltext = '' solr_add(recid, abstract, author, fulltext, keyword, title) next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid) return next_commit_counter
def _add_record_variables(): from invenio.legacy.bibdocfile.api import BibRecDocs from invenio.modules.records.api import get_record ctx = dict( daap_files=[ f for f in BibRecDocs(kwargs['recid'], human_readable=True). list_latest_files(list_hidden=False) if not f.is_icon() ], # this updates the DB, but avoids ugly caching daap_record=get_record(kwargs['recid'], True)) return ctx
def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir} for type, dir in iteritems(watcheddirs): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if filename not in formats: formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if bibarchive.get_docname(existingBibdoc.id) not in formats: existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def get_files_from_bibdoc(recid): """ Retrieves using BibDoc all the files related with a given record @param recid @return List of dictionaries containing all the information stored inside BibDoc if the current record has files attached, the empty list otherwise """ if not recid or recid < 0: return [] from invenio.legacy.bibdocfile.api import BibRecDocs, InvenioBibDocFileError files = [] try: bibrecdocs = BibRecDocs(int(recid)) except InvenioBibDocFileError: return [] latest_files = bibrecdocs.list_latest_files() for afile in latest_files: file_dict = {} file_dict['comment'] = afile.get_comment() file_dict['description'] = afile.get_description() file_dict['eformat'] = afile.get_format() file_dict['full_name'] = afile.get_full_name() file_dict['full_path'] = afile.get_full_path() file_dict['magic'] = afile.get_magic() file_dict['name'] = afile.get_name() file_dict['path'] = afile.get_path() file_dict['size'] = afile.get_size() file_dict['status'] = afile.get_status() file_dict['subformat'] = afile.get_subformat() file_dict['superformat'] = afile.get_superformat() file_dict['type'] = afile.get_type() file_dict['url'] = afile.get_url() file_dict['version'] = afile.get_version() files.append(file_dict) return files
def format_element(bfo): """ Format element function to create the select and option elements with HTML5 data attributes that store all the necesarry metadata to construct video sources with JavaScript.""" videos = { '360p': {'width': 640, 'height': 360, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}, '480p': {'width': 854,'height': 480, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None,}, '720p': {'width': 1280, 'height': 720, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}, '1080p': {'width': 1920, 'height': 1080, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None} } recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() ## Go through all the BibDocs and search for video related signatures for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: ## When a video signature is found, add the url to the videos dictionary if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'): src = bibdocfile.get_url() codec = bibdocfile.get_superformat()[1:] size = bibdocfile.get_subformat() videos[size][codec] = src ## When a poster signature is found, add the url to the videos dictionary elif bibdocfile.get_comment() in ('SUGGESTIONTUMB', 'BIGTHUMB', 'POSTER', 'SMALLTHUMB') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'): src = bibdocfile.get_url() size = bibdocfile.get_subformat() videos[size]['poster'] = src ## Build video select options for every video size format that was found select_options = [] for key, options in iteritems(videos): ## If we have at least one url, the format is available if options['mp4'] or options['webm'] or options['ogv']: ## create am option element option_element = create_option_element(url_webm=options['webm'], url_ogv=options['ogv'], url_mp4=options['mp4'], url_poster=options['poster'], width=options['width'], height=options['height'], subformat=key) select_options.append(option_element) select_element = create_select_element(select_options) return select_element
def format_element(bfo): """ Format element function to create the select and option elements with HTML5 data attributes that store all the necesarry metadata to construct video sources with JavaScript.""" videos = { '360p': {'width': 640, 'height': 360, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}, '480p': {'width': 854,'height': 480, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None,}, '720p': {'width': 1280, 'height': 720, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}, '1080p': {'width': 1920, 'height': 1080, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None} } recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() ## Go through all the BibDocs and search for video related signatures for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: ## When a video signature is found, add the url to the videos dictionary if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'): src = bibdocfile.get_url() codec = bibdocfile.get_superformat()[1:] size = bibdocfile.get_subformat() videos[size][codec] = src ## When a poster signature is found, add the url to the videos dictionary elif bibdocfile.get_comment() in ('POSTER') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'): src = bibdocfile.get_url() size = bibdocfile.get_subformat() videos[size]['poster'] = src ## Build video select options for every video size format that was found select_options = [] for key, options in iteritems(videos): ## If we have at least one url, the format is available if options['mp4'] or options['webm'] or options['ogv']: ## create am option element option_element = create_option_element(url_webm=options['webm'], url_ogv=options['ogv'], url_mp4=options['mp4'], url_poster=options['poster'], width=options['width'], height=options['height'], subformat=key) select_options.append(option_element) select_element = create_select_element(select_options) return select_element
def xapian_add_all(lower_recid, upper_recid): """ Adds the regarding field values of all records from the lower recid to the upper one to Xapian. It preserves the fulltext information. """ xapian_init_databases() for recid in range(lower_recid, upper_recid + 1): try: abstract = unicode(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8') except: abstract = "" xapian_add(recid, "abstract", abstract) try: first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0] additional_authors = reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '') author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" xapian_add(recid, "author", author) try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(bibrecdocs.get_text(), 'utf-8') except: fulltext = "" xapian_add(recid, "fulltext", fulltext) try: keyword = unicode(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8') except: keyword = "" xapian_add(recid, "keyword", keyword) try: title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8') except: title = "" xapian_add(recid, "title", title)
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.bibrec_links[0]["recid"] except InvenioBibDocFileError: return warning_page( _("An error has happened in trying to retrieve the requested file." ), req, ln) else: return warning_page( _('Not enough information to retrieve the document'), req, ln) else: brd = BibRecDocs(recid) if not name and docid: ## Let's obtain the name from the docid try: name = brd.get_docname(docid) except InvenioBibDocFileError: return warning_page( _("An error has happened in trying to retrieving the requested file." ), req, ln) docformat = normalize_format(docformat) redirect_to_url( req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.bibrec_links[0]["recid"] except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln) else: return warning_page(_('Not enough information to retrieve the document'), req, ln) else: brd = BibRecDocs(recid) if not name and docid: ## Let's obtain the name from the docid try: name = brd.get_docname(docid) except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln) docformat = normalize_format(docformat) redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
def _bibdoc_file_list(recid): import os, os.path from invenio.legacy.bibdocfile.api import BibRecDocs try: recdocs = BibRecDocs(recid) except: current_app.logger.error("REST API: Error while building BibRecDocs for record %d" % (recid,)) return [] files = [] for d in recdocs.list_bibdocs(): df = d.list_latest_files() if not df: continue filename = df[0].get_full_name().decode('utf-8') docname, doctype = os.path.splitext(filename) if doctype.startswith('.'): doctype = doctype[1:] files.append({ 'id': d.get_id(), 'name': docname, 'type': doctype, 'size': df[0].get_size(), }) return files
def fix_recid(recid, logfile): """Fix a given recid.""" print("Upgrading record %s ->" % recid, end=' ') print("Upgrading record %s:" % recid, file=logfile) bibrec = BibRecDocs(recid) print(bibrec, file=logfile) docnames = bibrec.get_bibdoc_names() try: for docname in docnames: print(docname, end=' ') new_bibdocs = bibrec.fix(docname) new_bibdocnames = [bibrec.get_docname(bibdoc.id) for bibdoc in new_bibdocs] if new_bibdocnames: print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), end=' ') print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), file=logfile) except InvenioBibDocFileError as e: print(BibRecDocs(recid), file=logfile) print("%s -> ERROR", e) return False else: print(BibRecDocs(recid), file=logfile) print("-> OK") return True
def download_one(recid, version): """Download given version of the PDF from arxiv""" write_message("fetching %s" % recid) for count, arxiv_id in enumerate(extract_arxiv_ids_from_recid(recid)): if count != 0: write_message("Warning: %s has multiple arxiv #" % recid) continue url_for_pdf = build_arxiv_url(arxiv_id, version) filename_arxiv_id = arxiv_id.replace("/", "_") temp_file = NamedTemporaryFile( prefix="arxiv-pdf-checker", dir=CFG_TMPSHAREDDIR, suffix="%s.pdf" % filename_arxiv_id ) write_message("downloading pdf from %s" % url_for_pdf) path = download_external_url(url_for_pdf, temp_file.name, content_type="pdf") # Check if it is not an html not found page filesize = os.path.getsize(path) if filesize < 25000: f = open(path) try: for line in f: if "PDF unavailable" in line: raise PdfNotAvailable() finally: f.close() docs = BibRecDocs(recid) bibdocfiles = docs.list_latest_files(doctype="arXiv") needs_update = False try: bibdocfile = bibdocfiles[0] except IndexError: bibdocfile = None needs_update = True else: existing_md5 = calculate_md5(bibdocfile.fullpath) new_md5 = calculate_md5(path.encode("utf-8")) if new_md5 != existing_md5: write_message("md5 differs updating") needs_update = True else: write_message("md5 matches existing pdf, skipping") if needs_update: if bibdocfiles: write_message("adding as new version") docs.add_new_version(path, docname=bibdocfile.name) else: write_message("adding as new file") docs.add_new_file(path, doctype="arXiv", docname="arXiv:%s" % filename_arxiv_id) else: raise FoundExistingPdf()
def format_element(bfo, separator=" ", style='', img_style='', text_style='font-size:small', print_links='yes', max_photos='', show_comment='yes', img_max_width='250px', display_all_version_links='yes'): """ Lists the photos of a record. Display the icon version, linked to its original version. This element works for photos appended to a record as BibDoc files, for which a preview icon has been generated. If there are several formats for one photo, use the first one found. @param separator: separator between each photo @param print_links: if 'yes', print links to the original photo @param style: style attributes of the whole image block. Eg: "padding:2px;border:1px" @param img_style: style attributes of the images. Eg: "width:50px;border:none" @param text_style: style attributes of the text. Eg: "font-size:small" @param max_photos: the maximum number of photos to display @param show_comment: if 'yes', display the comment of each photo @param display_all_version_links: if 'yes', print links to additional (sub)formats """ photos = [] bibarchive = BibRecDocs(bfo.recID) bibdocs = bibarchive.list_bibdocs() if max_photos.isdigit(): max_photos = int(max_photos) else: max_photos = len(bibdocs) for doc in bibdocs[:max_photos]: found_icons = [] found_url = '' for docfile in doc.list_latest_files(): if docfile.is_icon(): found_icons.append(( docfile.get_size(), get_relative_url(docfile.get_url()) )) else: found_url = get_relative_url(docfile.get_url()) found_icons.sort() if found_icons: additional_links = '' name = bibarchive.get_docname(doc.id) comment = doc.list_latest_files()[0].get_comment() preview_url = None if len(found_icons) > 1: preview_url = get_relative_url(found_icons[1][1]) additional_urls = [(docfile.get_size(), get_relative_url(docfile.get_url()), \ docfile.get_superformat(), docfile.get_subformat()) \ for docfile in doc.list_latest_files() if not docfile.is_icon()] additional_urls.sort() additional_links = [create_html_link(url, urlargd={}, \ linkattrd={'style': 'font-size:x-small'}, \ link_label="%s %s (%s)" % (format.strip('.').upper(), subformat, format_size(size))) \ for (size, url, format, subformat) in additional_urls] img = '<img src="%(icon_url)s" alt="%(name)s" style="max-width:%(img_max_width)s;_width:%(img_max_width)s;%(img_style)s" />' % \ {'icon_url': cgi.escape(get_relative_url(found_icons[0][1]), True), 'name': cgi.escape(name, True), 'img_style': img_style, 'img_max_width': img_max_width} if print_links.lower() == 'yes': img = '<a href="%s">%s</a>' % (cgi.escape(preview_url or found_url, True), img) if display_all_version_links.lower() == 'yes' and additional_links: img += '<br />' + ' '.join(additional_links) + '<br />' if show_comment.lower() == 'yes' and comment: img += '<div style="margin-auto;text-align:center;%(text_style)s">%(comment)s</div>' % \ {'comment': comment.replace('\n', '<br/>'), 'text_style': text_style} img = '<div style="vertical-align: middle;text-align:center;display:inline-block;display: -moz-inline-stack;zoom: 1;*display: inline;max-width:%(img_max_width)s;_width:%(img_max_width)s;text-align:center;%(style)s">%(img)s</div>' % \ {'img_max_width': img_max_width, 'style': style, 'img': img} photos.append(img) return '<div>' + separator.join(photos) + '</div>'