def get_media_from_recid(recid): ''' This method get the file in the given url @param recid: id of the file to get ''' medias = [] bibarchiv = BibRecDocs(recid) bibdocs = bibarchiv.list_latest_files() for bibdocfile in bibdocs: bibfile = {'name': bibdocfile.get_full_name(), 'file': '', 'type': 'application/%s' % \ bibdocfile.get_superformat().split(".")[-1], 'path': bibdocfile.get_full_path(), 'collection': bibdocfile.get_type(), 'size': bibdocfile.get_size(), 'loaded': False, 'selected': ''} if bibfile['collection'] == "Main": bibfile['selected'] = 'checked=yes' medias.append(bibfile) return medias
def format_element(bfo): """ Display image of the thumbnail plot if we are in selected plots collections """ ## To achieve this, we take the Thumb file associated with this document bibarchive = BibRecDocs(bfo.recID) img_files = [] for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() if img_location == "": continue img = '<img src="%s" width="100px"/>' % (img_location) img_files.append((index, img_location)) # FIXME: was link here if _file.get_type() == "Thumb": img_location = _file.get_url() img = '<img src="%s" width="100px"/>' % (img_location) return '<div align="left">' + img + "</div>" # then we use the default: the last plot with an image img_files = sorted(img_files, key=lambda x: x[0]) if img_files: return '<div align="left">' + img_files[-1][1] + "</div>" else: return ""
def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc my_bibrecdoc = BibRecDocs(2) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual( my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path self.assert_( my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR)) self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name(), 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name(), 'img_test.jpg') #get full path self.assert_(my_new_bibdocfile.get_full_path().startswith( CFG_WEBSUBMIT_FILEDIR)) self.assert_( my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display value = my_new_bibdocfile.display(ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True)
def format_element(bfo, width="", caption="yes", max="3"): """ Display image of the plot if we are in selected plots collections @param width: the width of the returned image (Eg: '100px') @param caption: display the captions or not? @param max: the maximum number of plots to display (-1 is all plots) """ ## To achieve this, we take the pngs associated with this document img_files = [] max = int(max) link = "" bibarchive = BibRecDocs(bfo.recID) if width != "": width = 'width="%s"' % width for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": try: caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() except: # FIXME: we have hit probably a plot context file, # so ignore this document; but it would be safer # to check subformat type, so that we don't mask # other eventual errors here. continue img = '<img style="vertical-align:middle;" src="%s" title="%s" %s/>' % \ (img_location, caption_text, width) plotlink = create_html_link(urlbase='%s/%s/%s/plots#%d' % (CFG_BASE_URL, CFG_SITE_RECORD, bfo.recID,\ index), urlargd={}, link_label=img) img_files.append((index, plotlink)) img_files = sorted(img_files, key=lambda x: x[0]) if max > 0: img_files = img_files[:max] if len(img_files) >= max: link = "<a href='/record/" + bfo.control_field('001') + "/plots'>Show more plots</a>" for index in range(len(img_files)): img_files[index] = img_files[index][1] if len(img_files) == 0: return '' return '<div style="overflow-x:auto;display:inline;width:100%;">' +\ " ".join(img_files) + ' ' + link + '</div>'
def goto(cc=CFG_SITE_NAME, p='', f='', sf='', so='d', docname='', format=''): """ Redirect the user to the latest record in the given collection, optionally within the specified pattern and field. If docname and format are specified, redirect the user to the corresponding docname and format. If docname it is not specified, but there is only a single bibdoc attached to the record will redirect to that one. """ recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so) if recids: ## We shall take the last recid. This is the last one recid = recids[-1] url = '/%s/%s' % (CFG_SITE_RECORD, recid) if format: bibrecdocs = BibRecDocs(recid) if not docname: if len(bibrecdocs.get_bibdoc_names()) == 1: docname = bibrecdocs.get_bibdoc_names()[0] else: return url try: bibdoc = BibRecDocs(recid).get_bibdoc(docname) except InvenioBibDocFileError: return url try: bibdocfile = bibdoc.get_file(format=format) return bibdocfile.get_url() except InvenioBibDocFileError: return url return url
def migrate_bibdoc_status(recid, is_public, access_right): from invenio.search_engine import get_fieldvalues from invenio.bibdocfile import BibRecDocs # Generate firerole fft_status = [] if is_public: email = get_fieldvalues(recid, "8560_f")[0] if access_right == "open": # Access to everyone fft_status = ["allow any"] elif access_right == "embargoed": # Access to submitted, Deny everyone else until embargo date, # then allow all date = get_fieldvalues(recid, "942__a")[0] fft_status = ['allow email "%s"' % email, 'deny until "%s"' % date, "allow any"] elif access_right in ("closed", "restricted"): # Access to submitter, deny everyone else fft_status = ['allow email "%s"' % email, "deny all"] else: # Access to submitter, deny everyone else fft_status = None if fft_status: fft_status = "firerole: %s" % "\n".join(fft_status) brd = BibRecDocs(recid) for d in brd.list_bibdocs(): d.set_status(fft_status)
def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main': MainDir, 'Additional': IncludeDir} for type, dir in watcheddirs.iteritems(): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if not formats.has_key(filename): formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if not formats.has_key(existingBibdoc.get_docname()): existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def create_download_popup(bfo): """Create the complete download popup""" elements = [] recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() ## Go through all the BibDocs and search for video related signatures for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: ## When a video signature is found, add it as an element if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv', '.mov', '.wmv', '.avi', '.mpeg', '.flv', '.mkv'): url = bibdocfile.get_url() codec = bibdocfile.get_superformat()[1:] resolution = bibdocfile.get_subformat() size = bibdocfile.get_size() elements.append(create_download_element(url, codec, size, resolution)) if elements: return html_skeleton_popup % { 'elements': "\n".join(elements) } else: return ""
def get_media_from_recid(recid): ''' This method get the file in the given url @param(recid) : id of the file to get @return (file_type) : the mime type of the file found @return (data) : the file in a string variable ''' medias = [] bibarchiv = BibRecDocs(recid) bibdocs = bibarchiv.list_latest_files() for bibdocfile in bibdocs : bibfile = {'name': bibdocfile.get_full_name(), 'file' : '', 'type': 'application/%s' % \ bibdocfile.get_superformat().split(".")[-1], 'path': bibdocfile.get_full_path(), 'collection' : bibdocfile.get_type(), 'size': bibdocfile.get_size(), 'loaded' : False, 'selected' : ''} if bibfile['collection'] == "Main" : bibfile['selected'] = 'checked=yes' medias.append(bibfile) return medias
def solr_add_range(lower_recid, upper_recid): """ Adds the regarding field values of all records from the lower recid to the upper one to Solr. It preserves the fulltext information. """ for recid in range(lower_recid, upper_recid + 1): if record_exists(recid): try: abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8') except: abstract = "" try: first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]) additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')) author = unicode(first_author + " " + additional_authors, 'utf-8') except: author = "" try: bibrecdocs = BibRecDocs(recid) fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8') except: fulltext = "" try: keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8') except: keyword = "" try: title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8') except: title = "" solr_add(recid, abstract, author, fulltext, keyword, title) SOLR_CONNECTION.commit() task_sleep_now_if_required(can_stop_too=True)
def format_element(bfo, subformat="480p"): """ Creates HTML5 source elements for the given subformat. MP4, WebM and OGV are currently supported as video sources. The function will scan the bibdocfiles attached to the record for videos with these formats and the fiven subformat. @param subformat: BibDocFile subformat to create the sources from (e.g. 480p) """ video_sources = [] recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() == subformat: src = bibdocfile.get_url() ftype = bibdocfile.get_superformat()[1:] if ftype == 'mp4': codecs = 'avc1.42E01E, mp4a.40.2' elif ftype == 'webm': codecs = 'vp8, vorbis' elif ftype == 'ogv': codecs = 'theora, vorbis' source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % (src, ftype, codecs) video_sources.append(source) return "\n".join(video_sources)
def Move_Files_Archive(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" MainDir = "%s/files/MainFiles" % curdir IncludeDir = "%s/files/AdditionalFiles" % curdir watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir} for type, dir in watcheddirs.iteritems(): if os.path.exists(dir): formats = {} files = os.listdir(dir) files.sort() for file in files: dummy, filename, extension = decompose_file(file) if not formats.has_key(filename): formats[filename] = [] formats[filename].append(normalize_format(extension)) # first delete all missing files bibarchive = BibRecDocs(sysno) existingBibdocs = bibarchive.list_bibdocs(type) for existingBibdoc in existingBibdocs: if not formats.has_key(existingBibdoc.get_docname()): existingBibdoc.delete() # then create/update the new ones for key in formats.keys(): # instanciate bibdoc object bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True) return ""
def format_element(bfo, subformat="480p"): """ Creates HTML5 source elements for the given subformat. MP4, WebM and OGV are currently supported as video sources. The function will scan the bibdocfiles attached to the record for videos with these formats and the fiven subformat. @param subformat: BibDocFile subformat to create the sources from (e.g. 480p) """ video_sources = [] recdoc = BibRecDocs(bfo.recID) bibdocs = recdoc.list_bibdocs() for bibdoc in bibdocs: bibdocfiles = bibdoc.list_all_files() for bibdocfile in bibdocfiles: if bibdocfile.get_superformat() in ( '.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() == subformat: src = bibdocfile.get_url() ftype = bibdocfile.get_superformat()[1:] if ftype == 'mp4': codecs = 'avc1.42E01E, mp4a.40.2' elif ftype == 'webm': codecs = 'vp8, vorbis' elif ftype == 'ogv': codecs = 'theora, vorbis' source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % ( src, ftype, codecs) video_sources.append(source) return "\n".join(video_sources)
def format_element(bfo, separator='<br/>', width="800px", height="480px"): """ Display Flash (swf) panorama attached to this record. Consider files attached as .swf file with doctype 'panoaram'. @param separator: printed between each panorama @param width: width of each panorama @param height: height of each panorama """ out = "" panoramas = [] bibarchive = BibRecDocs(bfo.recID) # Prepare the Javascripts for bibdocfile in bibarchive.list_latest_files(doctype='panorama'): if bibdocfile.get_format() == '.swf': pano_index = len(panoramas) panoramas.append('embedpano({swf:"%(swf_file)s", target:"panoramabox%(pano_index)s", width:"%(width)s", height:"%(height)s"});' \ % {'swf_file': bibdocfile.get_url(), 'pano_index': pano_index, 'width': width, 'height': height}) if panoramas: out = separator.join(['<div id="panoramabox%i" style="margin:auto"></div>' %i for i in xrange(len(panoramas))]) out += '<script type="text/javascript" src="/js/swfkrpano.js"></script>' out += '<script type="text/javascript">' + \ ''.join(panoramas) + \ '</script>' return out
def setUp(self): self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file( CFG_PREFIX + '/lib/webtest/invenio/test.jpg', docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id
def insert_docfiles_in_modify_form(recid): bibrecdocs = BibRecDocs(recid) # Create the list of files based on current files and performed # actions bibdocs = bibrecdocs.display() bibdocs = bibdocs.replace("<small><b>hgf_file</b> file(s):</small>","") #delete that part et the beginning of html return bibdocs #bibdocs already html formatted
def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG): if not recid: ## Let's obtain the recid from the docid if docid: try: bibdoc = BibDoc(docid=docid) recid = bibdoc.bibrec_links[0]["recid"] except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln) else: return warning_page(_("Not enough information to retrieve the document"), req, ln) else: brd = BibRecDocs(recid) if not name and docid: ## Let's obtain the name from the docid try: name = brd.get_docname(docid) except InvenioBibDocFileError: return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln) docformat = normalize_format(docformat) redirect_to_url( req, "%s/%s/%s/files/%s%s?ln=%s%s" % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and "version=%s" % version or ""), apache.HTTP_MOVED_PERMANENTLY, )
def check_records(records): for record in records: ## Stupid hack because bibcheck filters does not work as expected if record_get_field_value(record, '980', code='b') == "Hindawi": record.warn("Working on this record") recdoc = BibRecDocs(int(record.record_id)) doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0]) try: xml_file = open(doc.get_file("xml").get_full_path()) except: record.warn("No document can be found") continue xml2 = xml.dom.minidom.parseString(xml_file.read()) subject = get_value_in_tag(xml2, "subject") if subject in ["Editorial", "Erratum", "Corrigendum", "Addendum","Letter to the Editor"]: field = record_get_field_value(record, '980', code='c') if field: if field in ['ERRATUM', 'ADDENDUM', 'EDITORIAL','CORRIGENDUM', 'LETTER TO THE EDITOR']: for position, value in record.iterfield('980__c'): record.amend_field(position, subject.upper()) break else: for position, value in record.iterfield('980__%'): record.add_subfield(position, 'c', subject.upper()) break else: for position, value in record.iterfield('980__%'): record.add_subfield(position, 'c', subject.upper()) break elif subject not in ["Review Article","Research Article","Retraction"]: raise Exception("This subject: %s does not exit in SCOAP3 system" % (subject,))
def format_element(bfo, width="", caption="yes", max="3"): """ Display image of the plot if we are in selected plots collections @param width: the width of the returned image (Eg: '100px') @param caption: display the captions or not? @param max: the maximum number of plots to display (-1 is all plots) """ ## To achieve this, we take the pngs associated with this document img_files = [] max = int(max) link = "" bibarchive = BibRecDocs(bfo.recID) if width != "": width = 'width="%s"' % width for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": try: caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() except: # FIXME: we have hit probably a plot context file, # so ignore this document; but it would be safer # to check subformat type, so that we don't mask # other eventual errors here. continue img = '<img style="vertical-align:middle;" src="%s" title="%s" %s/>' % \ (img_location, caption_text, width) link = create_html_link(urlbase='%s/%s/%s/plots#%d' % (CFG_SITE_URL, CFG_SITE_RECORD, bfo.recID,\ index), urlargd={}, link_label=img) img_files.append((index, link)) img_files = sorted(img_files, key=lambda x: x[0]) if max > 0: img_files = img_files[:max] link = "<a href='/record/" + bfo.control_field( '001') + "/plots'>Show more plots</a>" for index in range(len(img_files)): img_files[index] = img_files[index][1] if len(img_files) == 0: return '' return '<div style="overflow-x:auto;display:inline;width:100%;">' +\ " ".join(img_files) + ' ' + link + '</div>'
def get_rawtext_from_record(record): bibrec = BibRecDocs(record.record_id) bibdoc = get_latest_pdf(bibrec.list_latest_files()) try: rawtext = bibdoc.bibdoc.get_text() except: return '' return rawtext
def record_has_arxiv_pdf(recid=None): if recid is None: return False brd = BibRecDocs(recid) for bdf in brd.list_latest_files(doctype="arXiv"): if bdf.format.lower() in ('.pdf', '.pdfa'): return True return False
def check_records(records): for record in records: if is_springer(record): rec_doc = BibRecDocs(int(record.record_id)) rec_docs = rec_doc.list_latest_files() for doc in rec_docs: if doc.get_format() == '.xml': f = open(doc.get_full_path()) content = f.read() try: del record['100'] del record['700'] record.amended = True except: pass first_author = True try: if "-//NLM//DTD JATS" in content: jats = JATSParser() authors = jats.get_authors(parseString(content)) else: app = NLMParser() authors = app.get_authors(parseString(content)) except: record.warn('Problem with parsing XML.') continue for author in authors: if author.get('surname'): subfields = [ ('a', '%s, %s' % (author.get('surname'), author.get('given_name') or author.get('initials', ''))) ] else: subfields = [('a', '%s' % (author.get('name', ''))) ] if 'orcid' in author: subfields.append(('j', author['orcid'])) if 'affiliation' in author: for aff in author["affiliation"]: subfields.append(('v', aff)) add_nations_field(subfields) if author.get('email'): subfields.append(('m', author['email'])) if first_author: record.add_field('100__', value='', subfields=subfields) first_author = False else: record.add_field('700__', value='', subfields=subfields)
def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc from invenio.bibdocfile import BibRecDocs my_bibrecdoc = BibRecDocs(2) timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6])) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp) my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path # we should not test for particular path ! this is in the gestion of the underlying implementation, # not the interface which should ne tested # self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR)) # self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR)) #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display import invenio.template tmpl = invenio.template.load("bibdocfile") value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #check modification date self.assertEqual(my_new_bibdocfile.md, timestamp) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True)
def format(bfo, width="", caption="yes", max="-1"): """ Display image of the plot if we are in selected plots collections @param width: the width of the returned image (Eg: '100px') @param separator: a separator between images @param caption: display the captions or not? @param max: the maximum number of plots to display (-1 is all plots) """ ## To achieve this, we take the pngs associated with this document img_files = [] max = int(max) bibarchive = BibRecDocs(bfo.recID) if width != "": width = 'width="%s"' % width for doc in bibarchive.list_bibdocs(): for _file in doc.list_latest_files(): if _file.get_type() == "Plot": try: caption_text = _file.get_description()[5:] index = int(_file.get_description()[:5]) img_location = _file.get_url() except: # FIXME: we have hit probably a plot context file, # so ignore this document; but it would be safer # to check subformat type, so that we don't mask # other eventual errors here. continue img = '<img src="%s" title="%s" %s/>' % \ (img_location, caption_text, width) link = create_html_link(urlbase='%s/record/%s/plots#%d' % (CFG_SITE_URL, bfo.recID,\ index), urlargd={}, link_label=img) img_files.append((index, link)) img_files = sorted(img_files, key=lambda x: x[0]) if max > 0: img_files = img_files[:max] for index in range(len(img_files)): img_files[index] = img_files[index][1] if len(img_files) == 0: return '' return '<div style="overflow-x:scroll;width=100%;white-space:nowrap">' +\ " ".join(img_files) + '</div>'
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir,file) if not bibrecdocs.check_file_exists(fullpath): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def get_filetypes(recid): """ Returns filetypes extensions associated with given record. Takes as a parameter the recid of a record. @param url_field: recid of a record """ docs = BibRecDocs(recid) return [_get_filetype(d.format) for d in docs.list_latest_files()]
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir, file) if not bibrecdocs.check_file_exists(fullpath): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def get_rawtext_from_record_id(record_id): bibrec = BibRecDocs(record_id) bibdoc = get_latest_pdf(bibrec.list_latest_files()) try: rawtext = bibdoc.bibdoc.get_text() except: return '' return rawtext
def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc my_bibrecdoc = BibRecDocs(2) timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6])) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp) my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD) #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path # we should not test for particular path ! this is in the gestion of the underlying implementation, # not the interface which should ne tested # self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR)) # self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR)) #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display tmpl = invenio.template.load("bibdocfile") value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #check modification date self.assertEqual(my_new_bibdocfile.md, timestamp) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True)
def get_pdf(recid): bibrecdocs = BibRecDocs(recid) for bibdoc in bibrecdocs.list_bibdocs(): if bibdoc.format_already_exists_p(".pdf"): docfile = bibdoc.get_file(".pdf") return docfile.checksum, docfile.url, ".pdf" elif bibdoc.format_already_exists_p(".pdf;pdfa"): docfile = bibdoc.get_file(".pdf;pdfa") return docfile.checksum, docfile.url, ".pdf;pdfa" return None, None, None
def list_pdfs(recid): rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for ext in ('pdf', 'pdfa', 'PDF'): try: yield doc.get_file(ext) except InvenioBibDocFileError: pass
def list_pdfs(recid): rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for ext in ("pdf", "pdfa", "PDF"): try: yield doc.get_file(ext) except InvenioBibDocFileError: pass
class BibDocFsInfoTest(unittest.TestCase): """Regression tests about the table bibdocfsinfo""" def setUp(self): self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname("file") self.my_bibdoc = self.my_bibrecdoc.add_new_file( CFG_PREFIX + "/lib/webtest/invenio/test.jpg", docname=self.unique_name ) self.my_bibdoc_id = self.my_bibdoc.id def tearDown(self): self.my_bibdoc.expunge() def test_hard_delete(self): """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 1 ) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id,), )[0][0], True, ) self.my_bibdoc.add_file_new_version(CFG_PREFIX + "/lib/webtest/invenio/test.gif") self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 2 ) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id,), )[0][0], True, ) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id,), )[0][0], False, ) self.my_bibdoc.delete_file(".gif", 2) self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 1 ) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id,), )[0][0], True, )
def look_for_fulltext(recid): rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for d in doc.list_all_files(): if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']: try: yield doc, d except InvenioWebSubmitFileError: pass
def look_for_fulltext(recid): rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() def check_doc(doc): for d in doc.list_all_files(): if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']: return True return False return (d for d in docs if check_doc(d))
def look_for_fulltext(recid): """Look for fulltext pdf (bibdocfile) for a given recid""" rec_info = BibRecDocs(recid) docs = rec_info.list_bibdocs() for doc in docs: for d in doc.list_all_files(): if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']: try: yield doc, d except InvenioBibDocFileError: pass
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir,current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def Add_Files(parameters, curdir, form, user_info=None): """DEPRECATED: Use FFT instead.""" if os.path.exists("%s/files" % curdir): bibrecdocs = BibRecDocs(sysno) for current_file in os.listdir("%s/files" % curdir): fullpath = "%s/files/%s" % (curdir, current_file) dummy, filename, extension = decompose_file(current_file) if extension and extension[0] != ".": extension = '.' + extension if not bibrecdocs.check_file_exists(fullpath, extension): bibrecdocs.add_new_file(fullpath, "Main", never_fail=True) return ""
def get_pdfa_record(self, path=None): from invenio.search_engine import perform_request_search xml_doc = self.get_article(path) rec = create_record() dummy, dummy, dummy, dummy, dummy, dummy, dummy,\ dummy, doi = self.get_publication_information(xml_doc) recid = perform_request_search(p='0247_a:"%s" AND NOT 980:"DELETED"' % (doi, )) if recid: record_add_field(rec, '001', controlfield_value=recid[0]) else: record_add_field(rec, '024', ind1='7', subfields=[('a', doi), ('2', 'DOI')]) message = ('Adding PDF/A. No paper with this DOI: ' '%s. Trying to add it anyway.') % (doi, ) self.logger.error(message) try: if exists(join(path, 'main_a-2b.pdf')): record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')), ('n', 'main'), ('f', '.pdf;pdfa')]) self.logger.debug('Adding PDF/A to record: %s' % (doi, )) elif exists(join(path, 'main.pdf')): record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))]) message = 'No PDF/A in VTEX package for record: ' + doi self.logger.debug(message) else: message = "Record %s doesn't contain PDF file." % (doi, ) raise MissingFFTError(message) except MissingFFTError: message = "Elsevier paper: %s is missing PDF." % (doi, ) register_exception(alert_admin=True, prefix=message) self.logger.warning(message) ## copy other formats to bibupload file if recid: from invenio.bibdocfile import BibRecDocs record = BibRecDocs(recid[0]) for bibfile in record.list_latest_files(): if bibfile.get_format() != '.pdf;pdfa': record_add_field(rec, 'FFT', subfields=[('a', bibfile.get_full_path()), ('n', bibfile.get_name()), ('f', bibfile.get_format())]) return record_xml_output(rec)
def test_BibDocFiles(self): """bibdocfile - BibDocFile functions """ #add bibdoc my_bibrecdoc = BibRecDocs(2) my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg') my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test") my_new_bibdocfile = my_new_bibdoc.list_all_files()[0] #get url self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/record/2/files/img_test.jpg') #get type self.assertEqual(my_new_bibdocfile.get_type(), 'Main') #get path self.assert_(my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR)) self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1')) #get bibdocid self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id()) #get name self.assertEqual(my_new_bibdocfile.get_name() , 'img_test') #get full name self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg') #get full path self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_WEBSUBMIT_FILEDIR)) self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1')) #get format self.assertEqual(my_new_bibdocfile.get_format(), '.jpg') #get version self.assertEqual(my_new_bibdocfile.get_version(), 1) #get description self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1)) #get comment self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1)) #get recid self.assertEqual(my_new_bibdocfile.get_recid(), 2) #get status self.assertEqual(my_new_bibdocfile.get_status(), '') #get size self.assertEqual(my_new_bibdocfile.get_size(), 91750) #get checksum self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76') #check self.assertEqual(my_new_bibdocfile.check(), True) #display value = my_new_bibdocfile.display(ln='en') assert 'files/img_test.jpg?version=1">' in value #hidden? self.assertEqual(my_new_bibdocfile.hidden_p(), False) #delete my_new_bibdoc.delete() self.assertEqual(my_new_bibdoc.deleted_p(), True)
def get_pdfa_record(self, path=None): from invenio.search_engine import perform_request_search xml_doc = self.get_article(path) rec = create_record() dummy, dummy, dummy, dummy, dummy, dummy, dummy,\ dummy, doi = self.get_publication_information(xml_doc) recid = perform_request_search(p='0247_a:"%s" AND NOT 980:"DELETED"' % (doi,)) if recid: record_add_field(rec, '001', controlfield_value=recid[0]) else: record_add_field(rec, '024', ind1='7', subfields=[('a', doi), ('2', 'DOI')]) message = ('Adding PDF/A. No paper with this DOI: ' '%s. Trying to add it anyway.') % (doi,) self.logger.error(message) try: if exists(join(path, 'main_a-2b.pdf')): record_add_field( rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')), ('n', 'main'), ('f', '.pdf;pdfa')]) self.logger.debug('Adding PDF/A to record: %s' % (doi,)) elif exists(join(path, 'main.pdf')): record_add_field( rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))]) message = 'No PDF/A in VTEX package for record: ' + doi self.logger.debug(message) else: message = "Record %s doesn't contain PDF file." % (doi,) raise MissingFFTError(message) except MissingFFTError: message = "Elsevier paper: %s is missing PDF." % (doi,) register_exception(alert_admin=True, prefix=message) self.logger.warning(message) ## copy other formats to bibupload file if recid: from invenio.bibdocfile import BibRecDocs record = BibRecDocs(recid[0]) for bibfile in record.list_latest_files(): if bibfile.get_format() != '.pdf;pdfa': record_add_field(rec, 'FFT', subfields=[('a', bibfile.get_full_path()), ('n', bibfile.get_name()), ('f', bibfile.get_format())] ) return record_xml_output(rec)
def check_records(records): for record in records: if is_springer(record): rec_doc = BibRecDocs(int(record.record_id)) rec_docs = rec_doc.list_latest_files() for doc in rec_docs: if doc.get_format() == '.xml': f = open(doc.get_full_path()) content = f.read() try: del record['100'] del record['700'] record.amended = True except: pass first_author = True try: if "-//NLM//DTD JATS" in content: jats = JATSParser() authors = jats.get_authors(parseString(content)) else: app = NLMParser() authors = app.get_authors(parseString(content)) except: record.warn('Problem with parsing XML.') continue for author in authors: if author.get('surname'): subfields = [('a', '%s, %s' % (author.get('surname'), author.get('given_name') or author.get('initials', '')))] else: subfields = [('a', '%s' % (author.get('name', '')))] if 'orcid' in author: subfields.append(('j', author['orcid'])) if 'affiliation' in author: for aff in author["affiliation"]: subfields.append(('v', aff)) add_nations_field(subfields) if author.get('email'): subfields.append(('m', author['email'])) if first_author: record.add_field('100__', value='', subfields=subfields) first_author = False else: record.add_field('700__', value='', subfields=subfields)
def upload_fulltext(recid, path): ''' This method save the uploaded file to associated record @param (recid) : id of the record @param (fulltext) : uploaded document to store ''' # upload the file to the record bibarchiv = BibRecDocs(recid) docname = path.split('/')[-1].split('.')[0] doctype = path.split('.')[-1].split(';')[0] bibarchiv.add_new_file(path, CFG_DOCTYPE_UPLOAD_COLLECTION, docname, format=doctype) return ''
def format_element(bfo, file_format='pdf'): """Return the files attached to this record, in order to be embedded as a Google Scholar tag. @param file_format: the format to include in this output """ if not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR: return "" bibarchive = BibRecDocs(bfo.recID) (files, old_versions_p, additionals_p) = get_files(bfo) filtered_files = [] if files.has_key('main_urls') and \ files['main_urls'].has_key('Main'): filtered_files = [f[0] for f in files['main_urls']['Main'] if f[2] == file_format and \ not url_is_hidden(f[0], bibarchive)] if not filtered_files: # Fall back to other doctypes if files.has_key('main_urls'): for doctype, list_of_files in files['main_urls'].iteritems(): filtered_files.extend([f[0] for f in list_of_files if f[2] == file_format and \ not url_is_hidden(f[0], bibarchive)]) if not filtered_files: # Fall back to external urls if files.has_key('others_urls'): filtered_files.extend([file_url for file_url, file_name in files['others_urls'] \ if file_url.endswith('.' + file_format)]) tags = ['<meta name="citation_pdf_url" content="%s" />' % url for url in filtered_files] return "\n".join(tags)
def setUp(self): self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname("file") self.my_bibdoc = self.my_bibrecdoc.add_new_file( CFG_PREFIX + "/lib/webtest/invenio/test.jpg", docname=self.unique_name ) self.my_bibdoc_id = self.my_bibdoc.id
def add_link_to_fulltext(bfo, text): """ Creates a link to fulltext on given text. """ documents = BibRecDocs(bfo.recID) # assert we have some files if documents and len(documents.bibdocs) == 0: return text # check visibility visible_list = [] for doc in documents.bibdocs: files = doc.list_latest_files() if len(files): #try: fulltext = files[0] if fulltext.status in ['', 'PUBLIC']: visible_list.append(fulltext) #except IndexError: # return # build url if len(visible_list) == 0: return text elif len(visible_list) == 1: #only one, return a direct url to the last version return '<a href ="%s">%s</a>' % (visible_list[0].fullurl, text) else: return '<a href ="%s/record/%s/files">%s</a>' % (CFG_SITE_URL, bfo.recID, text)
def uncook_files(webdeposit_json, recid=None, json_reader=None): if 'files' not in webdeposit_json: webdeposit_json['files'] = [] if recid is None: for f in json_reader['url']: filename = f['url'].split('/')[-1] file_json = { 'name': filename } webdeposit_json['files'].append(file_json) else: for f in BibRecDocs(recid, human_readable=True).list_latest_files(): filename = f.get_full_name() path = f.get_path() size = f.get_size() file_json = { 'name': filename, 'file': path, 'size': size } webdeposit_json['files'].append(file_json) return webdeposit_json
def get_filenames(recid): """ Returns names of the files associated with specific record and their derivatives. Takes as a parameter the recid of a record. Example: input: recID 999 (record with files ['thesis.ps.gz', 'random.pdf']) output: ['thesis.ps.gz', 'thesis.ps', 'thesis', 'random.pdf', 'random'] @param recid: recid of a record """ docs = BibRecDocs(recid) names = [_get_filenames(d.name + d.format) for d in docs.list_latest_files()] return reduce(lambda x,y: x+y, names)
def generate_keywords(req, recid, argd): """Extracts keywords from the fulltexts (if found) for the given recid. It first checks whether the keywords are not already stored in the temp file (maybe from the previous run). @var req: req object @var recid: record id @var argd: arguments passed from web @keyword store_keywords: boolean, whether to save records in the file @return: standard dictionary of kw objects or {} """ ln = argd['ln'] _ = gettext_set_language(ln) keywords = {} # check the files were not already generated abs_path = bibclassify_engine.get_tmp_file(recid) if os.path.exists(abs_path): try: # Try to load the data from the tmp file recs = bibupload.xml_marc_to_records(bibupload.open_marc_file(abs_path)) return record_get_keywords(recs[0]) except: pass # check it is allowed (for this user) to generate pages (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify') if exit_stat != 0: log.info('Access denied: ' + msg) msg = _("The site settings do not allow automatic keyword extraction") req.write(template.tmpl_page_msg(msg=msg)) return 0, keywords, None # register generation bibdocfiles = BibRecDocs(recid).list_latest_files() if bibdocfiles: # User arrived at a page, but no keywords are available inprogress, msg = _doc_already_submitted(recid) if argd['generate'] != 'yes': # Display a form and give them possibility to generate keywords if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg))) else: req.write(template.tmpl_page_generate_keywords(req=req, **argd)) return 0, keywords, None else: # after user clicked on "generate" button if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg) )) else: schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY) req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _('We have registered your request, the automated' 'keyword extraction will run after some time. Please return back in a while.'))) else: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _("Unfortunately, we don't have a PDF fulltext for this record in the storage, \ keywords cannot be generated using an automated process."))) return 0, keywords, None
def check_record(record): """ Validates the checksum of all the BibDocFile's in the record """ record_id = record["001"][0][3] docs = BibRecDocs(record_id).list_bibdocs() for doc in docs: for bibfile in doc.list_latest_files(): if not os.path.exists(bibfile.fullpath): record.set_invalid("File doesn't exists %s" % bibfile.fullpath) continue if not bibfile.check(): record.set_invalid("Invalid checksum for file %s" % bibfile.fullpath) if HAS_MAGIC: if HAS_MAGIC == 1: magic_mime = magic.from_file(bibfile.fullpath, mime=True) else: magic_mime = magic_object.file(bibfile.fullpath) if bibfile.mime != magic_mime: record.set_invalid( ("Guessed mime type from extension (%s) is different" + "from guessed mime type from headers (%s)") % (bibfile.mime, magic_mime))
def format_element(bfo): """ Prints buttons to download all photos for each size """ current_bibrecdoc = BibRecDocs(bfo.recID) if len(current_bibrecdoc.bibdocs) < 2: # If we have less than 2 photos, there is no point in displaying the # "Download all" buttons return wrapper = '''<style> #downloadallphotos { clear: both; font-size: small; color: #555444; margin-left: 10px; } #downloadallphotos a { border-radius: 5px; box-shadow: 1px 1px 1px 1px #CCCCCC; color: #222222; display: inline-block; margin: 2px 5px; padding: 3px; text-decoration: none; background-color: #E6E6FA; } #downloadallphotos a:hover { background: -moz-linear-gradient(center top , #3A3A3A 0%, #7D7E7D 100%) repeat scroll 0 0 rgba(0, 0, 0, 0); color: #fff; } </style>''' wrapper += '''<div id="downloadallphotos">Download all pictures:''' buttons = '' for (size, internal_size) in CFG_BIBDOCFILE_SUBFORMATS_TRANSLATIONS: total = current_bibrecdoc.get_total_size_latest_version( bfo.user_info, internal_size) # don't display the button if the size will be 0 if total: buttons += '<a %(original)s href="%(site)s/record/%(recID)s/files/allfiles-%(size)s">%(size)s (%(total)s)</a>' \ % {'original': size == 'original' and 'data-size="Original"' or '', 'site': CFG_SITE_URL, 'recID': bfo.recID, 'size': size, 'total': nice_size(total)} # If there are no buttons to display, don't display the rest of the HTML if buttons: return wrapper + buttons
def get_pdf_snippets(recID, patterns, user_info): """ Extract text snippets around 'patterns' from the newest PDF file of 'recID' The search is case-insensitive. The snippets are meant to look like in the results of the popular search engine: using " ... " between snippets. For empty patterns it returns "" @param recID: record ID to consider @param patterns: list of patterns to retrieve @param nb_words_around: max number of words around the matched pattern @param max_snippets: max number of snippets to include @return: snippet """ from invenio.bibdocfile import BibRecDocs, check_bibdoc_authorization text_path = "" text_path_courtesy = "" for bd in BibRecDocs(recID).list_bibdocs(): # Show excluded fulltext in snippets on Inspire, otherwise depending on authorization if bd.get_text() and (CFG_INSPIRE_SITE or not check_bibdoc_authorization(user_info, bd.get_status())[0]): text_path = bd.get_text_path() text_path_courtesy = bd.get_status() if CFG_INSPIRE_SITE and not text_path_courtesy: # get courtesy from doctype, since docstatus was empty: text_path_courtesy = bd.get_type() if text_path_courtesy == 'INSPIRE-PUBLIC': # but ignore 'INSPIRE-PUBLIC' doctype text_path_courtesy = '' break # stop at the first good PDF textable file nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0) max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0) if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.has_key(text_path_courtesy): nb_chars=CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS[text_path_courtesy] if CFG_WEBSEARCH_FULLTEXT_SNIPPETS.has_key(text_path_courtesy): max_snippets=CFG_WEBSEARCH_FULLTEXT_SNIPPETS[text_path_courtesy] if text_path and nb_chars and max_snippets: out = '' if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'native': out = get_text_snippets(text_path, patterns, nb_chars, max_snippets) if not out: # no hit, so check stemmed versions: from invenio.bibindex_engine_stemmer import stem stemmed_patterns = [stem(p, 'en') for p in patterns] out = get_text_snippets(text_path, stemmed_patterns, nb_chars, max_snippets) elif CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'SOLR': out = solr_get_snippet(patterns, recID, nb_chars, max_snippets) if out: out_courtesy = "" if CFG_INSPIRE_SITE and text_path_courtesy: out_courtesy = '<strong>Snippets courtesy of ' + text_path_courtesy + '</strong><br>' return """<div class="snippetbox">%s%s</div>""" % (out_courtesy, out) else: return "" else: return ""
def get_filenames(recid): """ Returns names of the files associated with specific record and their derivatives. Takes as a parameter the recid of a record. Example: input: recID 999 (record with files ['thesis.ps.gz', 'random.pdf']) output: ['thesis.ps.gz', 'thesis.ps', 'thesis', 'random.pdf', 'random'] @param recid: recid of a record """ docs = BibRecDocs(recid) names = [ _get_filenames(d.name + d.format) for d in docs.list_latest_files() ] return reduce(lambda x, y: x + y, names)
class BibDocFsInfoTest(InvenioTestCase): """Regression tests about the table bibdocfsinfo""" def setUp(self): from invenio.bibdocfile import BibRecDocs self.my_bibrecdoc = BibRecDocs(2) self.unique_name = self.my_bibrecdoc.propose_unique_docname('file') self.my_bibdoc = self.my_bibrecdoc.add_new_file( CFG_PREFIX + '/lib/webtest/invenio/test.jpg', docname=self.unique_name) self.my_bibdoc_id = self.my_bibdoc.id def tearDown(self): self.my_bibdoc.expunge() def test_hard_delete(self): """bibdocfile - test correct update of bibdocfsinfo when hard-deleting""" from invenio.dbquery import run_sql self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True) self.my_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif') self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False) self.my_bibdoc.delete_file('.gif', 2) self.assertEqual( run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1) self.assertEqual( run_sql( "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True)
def upload_fulltext(recid, path): ''' This method save the uploaded file to associated record @param recid: id of the record @param path: uploaded document to store ''' # upload the file to the record bibarchiv = BibRecDocs(recid) docname = path.split('/')[-1].split('.')[0] doctype = path.split('.')[-1].split(';')[0] bibarchiv.add_new_file(path, CFG_DOCTYPE_UPLOAD_COLLECTION, docname, format=doctype) return ''