def get_media_from_recid(recid):
    '''
        This method get the file in the given url
        @param recid: id of the file to get
    '''

    medias = []

    bibarchiv = BibRecDocs(recid)
    bibdocs = bibarchiv.list_latest_files()

    for bibdocfile in bibdocs:

        bibfile = {'name': bibdocfile.get_full_name(),
                   'file': '',
                   'type': 'application/%s' % \
                       bibdocfile.get_superformat().split(".")[-1],
                   'path': bibdocfile.get_full_path(),
                   'collection': bibdocfile.get_type(),
                   'size': bibdocfile.get_size(),
                   'loaded': False,
                   'selected': ''}

        if bibfile['collection'] == "Main":
            bibfile['selected'] = 'checked=yes'

        medias.append(bibfile)

    return medias
예제 #2
0
def format_element(bfo):
    """
    Display image of the thumbnail plot if we are in selected plots collections
    """
    ## To achieve this, we take the Thumb file associated with this document

    bibarchive = BibRecDocs(bfo.recID)

    img_files = []

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":
                caption_text = _file.get_description()[5:]
                index = int(_file.get_description()[:5])
                img_location = _file.get_url()

                if img_location == "":
                    continue

                img = '<img src="%s" width="100px"/>' % (img_location)
                img_files.append((index, img_location))  # FIXME: was link here

            if _file.get_type() == "Thumb":
                img_location = _file.get_url()
                img = '<img src="%s" width="100px"/>' % (img_location)
                return '<div align="left">' + img + "</div>"

    # then we use the default: the last plot with an image
    img_files = sorted(img_files, key=lambda x: x[0])
    if img_files:
        return '<div align="left">' + img_files[-1][1] + "</div>"
    else:
        return ""
예제 #3
0
 def test_BibDocFiles(self):
     """bibdocfile - BibDocFile functions """
     #add bibdoc
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
                               'Main', 'img_test', False,
                               'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
     #get url
     self.assertEqual(
         my_new_bibdocfile.get_url(),
         CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
     #get type
     self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
     #get path
     self.assert_(
         my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
     #get bibdocid
     self.assertEqual(my_new_bibdocfile.get_bibdocid(),
                      my_new_bibdoc.get_id())
     #get name
     self.assertEqual(my_new_bibdocfile.get_name(), 'img_test')
     #get full name
     self.assertEqual(my_new_bibdocfile.get_full_name(), 'img_test.jpg')
     #get full path
     self.assert_(my_new_bibdocfile.get_full_path().startswith(
         CFG_WEBSUBMIT_FILEDIR))
     self.assert_(
         my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
     #get format
     self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
     #get version
     self.assertEqual(my_new_bibdocfile.get_version(), 1)
     #get description
     self.assertEqual(my_new_bibdocfile.get_description(),
                      my_new_bibdoc.get_description('.jpg', version=1))
     #get comment
     self.assertEqual(my_new_bibdocfile.get_comment(),
                      my_new_bibdoc.get_comment('.jpg', version=1))
     #get recid
     self.assertEqual(my_new_bibdocfile.get_recid(), 2)
     #get status
     self.assertEqual(my_new_bibdocfile.get_status(), '')
     #get size
     self.assertEqual(my_new_bibdocfile.get_size(), 91750)
     #get checksum
     self.assertEqual(my_new_bibdocfile.get_checksum(),
                      '28ec893f9da735ad65de544f71d4ad76')
     #check
     self.assertEqual(my_new_bibdocfile.check(), True)
     #display
     value = my_new_bibdocfile.display(ln='en')
     assert 'files/img_test.jpg?version=1">' in value
     #hidden?
     self.assertEqual(my_new_bibdocfile.hidden_p(), False)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
예제 #4
0
def format_element(bfo, width="", caption="yes", max="3"):
    """
    Display image of the plot if we are in selected plots collections

    @param width: the width of the returned image (Eg: '100px')
    @param caption: display the captions or not?
    @param max: the maximum number of plots to display (-1 is all plots)
    """
    ## To achieve this, we take the pngs associated with this document

    img_files = []
    max = int(max)
    link = ""

    bibarchive = BibRecDocs(bfo.recID)

    if width != "":
        width = 'width="%s"' % width

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":

                try:
                    caption_text = _file.get_description()[5:]
                    index = int(_file.get_description()[:5])
                    img_location = _file.get_url()
                except:
                    # FIXME: we have hit probably a plot context file,
                    # so ignore this document; but it would be safer
                    # to check subformat type, so that we don't mask
                    # other eventual errors here.
                    continue

                img = '<img style="vertical-align:middle;" src="%s" title="%s" %s/>' % \
                      (img_location, caption_text, width)

                plotlink = create_html_link(urlbase='%s/%s/%s/plots#%d' %
                                                (CFG_BASE_URL, CFG_SITE_RECORD, bfo.recID,\
                                                 index),
                                        urlargd={},
                                        link_label=img)

                img_files.append((index, plotlink))

    img_files = sorted(img_files, key=lambda x: x[0])
    if max > 0:
        img_files = img_files[:max]

    if len(img_files) >= max:
        link = "<a href='/record/" + bfo.control_field('001') + "/plots'>Show more plots</a>"

    for index in range(len(img_files)):
        img_files[index] = img_files[index][1]

    if len(img_files) == 0:
        return ''

    return '<div style="overflow-x:auto;display:inline;width:100%;">' +\
           " ".join(img_files) + ' ' + link + '</div>'
def goto(cc=CFG_SITE_NAME, p='', f='', sf='', so='d', docname='', format=''):
    """
    Redirect the user to the latest record in the given collection,
    optionally within the specified pattern and field. If docname
    and format are specified, redirect the user to the corresponding
    docname and format. If docname it is not specified, but there is
    only a single bibdoc attached to the record will redirect to that
    one.
    """
    recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so)
    if recids:
        ## We shall take the last recid. This is the last one
        recid = recids[-1]
        url = '/%s/%s' % (CFG_SITE_RECORD, recid)
        if format:
            bibrecdocs = BibRecDocs(recid)
            if not docname:
                if len(bibrecdocs.get_bibdoc_names()) == 1:
                    docname = bibrecdocs.get_bibdoc_names()[0]
                else:
                    return url
            try:
                bibdoc = BibRecDocs(recid).get_bibdoc(docname)
            except InvenioBibDocFileError:
                return url
            try:
                bibdocfile = bibdoc.get_file(format=format)
                return bibdocfile.get_url()
            except InvenioBibDocFileError:
                return url
        return url
def migrate_bibdoc_status(recid, is_public, access_right):
    from invenio.search_engine import get_fieldvalues
    from invenio.bibdocfile import BibRecDocs

    # Generate firerole
    fft_status = []
    if is_public:
        email = get_fieldvalues(recid, "8560_f")[0]
        if access_right == "open":
            # Access to everyone
            fft_status = ["allow any"]
        elif access_right == "embargoed":
            # Access to submitted, Deny everyone else until embargo date,
            # then allow all
            date = get_fieldvalues(recid, "942__a")[0]
            fft_status = ['allow email "%s"' % email, 'deny until "%s"' % date, "allow any"]
        elif access_right in ("closed", "restricted"):
            # Access to submitter, deny everyone else
            fft_status = ['allow email "%s"' % email, "deny all"]
    else:
        # Access to submitter, deny everyone else
        fft_status = None

    if fft_status:
        fft_status = "firerole: %s" % "\n".join(fft_status)

        brd = BibRecDocs(recid)
        for d in brd.list_bibdocs():
            d.set_status(fft_status)
예제 #7
0
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main': MainDir, 'Additional': IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]),
                                        doctype=type,
                                        never_fail=True)
    return ""
def create_download_popup(bfo):
    """Create the complete download popup"""
    elements = []
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add it as an element
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv', 
                                                '.mov', '.wmv', '.avi', 
                                                '.mpeg', '.flv', '.mkv'):
                url = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                resolution = bibdocfile.get_subformat()
                size = bibdocfile.get_size()
                elements.append(create_download_element(url, codec, 
                                                        size, resolution))
    if elements:
        return html_skeleton_popup % {
                    'elements': "\n".join(elements)
                    }
    else:
        return ""
def get_media_from_recid(recid):
    '''
        This method get the file in the given url
        @param(recid) : id of the file to get
        @return (file_type) : the mime type of the file found
        @return (data) : the file in a string variable
    '''

    medias = []

    bibarchiv = BibRecDocs(recid)
    bibdocs = bibarchiv.list_latest_files()

    for bibdocfile in bibdocs :

        bibfile = {'name': bibdocfile.get_full_name(),
                   'file' : '',
                   'type': 'application/%s' % \
                       bibdocfile.get_superformat().split(".")[-1],
                   'path': bibdocfile.get_full_path(),
                   'collection' : bibdocfile.get_type(),
                   'size': bibdocfile.get_size(),
                   'loaded' : False,
                   'selected' : ''}

        if bibfile['collection'] == "Main" :
            bibfile['selected'] = 'checked=yes'

        medias.append(bibfile)

    return medias
예제 #10
0
def solr_add_range(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            try:
                abstract = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0]), 'utf-8')
            except:
                abstract = ""
            try:
                first_author = remove_control_characters(get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0])
                additional_authors = remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), ''))
                author = unicode(first_author + " " + additional_authors, 'utf-8')
            except:
                author = ""
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(remove_control_characters(bibrecdocs.get_text()), 'utf-8')
            except:
                fulltext = ""
            try:
                keyword = unicode(remove_control_characters(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), '')), 'utf-8')
            except:
                keyword = ""
            try:
                title = unicode(remove_control_characters(get_fieldvalues(recid, CFG_MARC_TITLE)[0]), 'utf-8')
            except:
                title = ""
            solr_add(recid, abstract, author, fulltext, keyword, title)

    SOLR_CONNECTION.commit()
    task_sleep_now_if_required(can_stop_too=True)
예제 #11
0
def format_element(bfo, subformat="480p"):
    """ Creates HTML5 source elements for the given subformat. 
    
    MP4, WebM and OGV are currently supported as video sources.
    The function will scan the bibdocfiles attached to the record for
    videos with these formats and the fiven subformat.
    
    @param subformat: BibDocFile subformat to create the sources from (e.g. 480p)
    """
    video_sources = []
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() == subformat:
                src = bibdocfile.get_url()
                ftype = bibdocfile.get_superformat()[1:]
                if ftype == 'mp4':
                    codecs = 'avc1.42E01E, mp4a.40.2'
                elif ftype == 'webm':
                    codecs = 'vp8, vorbis'
                elif ftype == 'ogv':
                    codecs = 'theora, vorbis'
                source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % (src, ftype, codecs)
                video_sources.append(source)
    return "\n".join(video_sources)
예제 #12
0
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in watcheddirs.iteritems():
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if not formats.has_key(filename):
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if not formats.has_key(existingBibdoc.get_docname()):
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""
예제 #13
0
def format_element(bfo, subformat="480p"):
    """ Creates HTML5 source elements for the given subformat. 
    
    MP4, WebM and OGV are currently supported as video sources.
    The function will scan the bibdocfiles attached to the record for
    videos with these formats and the fiven subformat.
    
    @param subformat: BibDocFile subformat to create the sources from (e.g. 480p)
    """
    video_sources = []
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            if bibdocfile.get_superformat() in (
                    '.mp4', '.webm',
                    '.ogv') and bibdocfile.get_subformat() == subformat:
                src = bibdocfile.get_url()
                ftype = bibdocfile.get_superformat()[1:]
                if ftype == 'mp4':
                    codecs = 'avc1.42E01E, mp4a.40.2'
                elif ftype == 'webm':
                    codecs = 'vp8, vorbis'
                elif ftype == 'ogv':
                    codecs = 'theora, vorbis'
                source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % (
                    src, ftype, codecs)
                video_sources.append(source)
    return "\n".join(video_sources)
def format_element(bfo, separator='<br/>', width="800px", height="480px"):
    """
    Display Flash (swf) panorama attached to this record. Consider
    files attached as .swf file with doctype 'panoaram'.

    @param separator: printed between each panorama
    @param width: width of each panorama
    @param height: height of each panorama
    """
    out = ""
    panoramas = []
    bibarchive = BibRecDocs(bfo.recID)
    # Prepare the Javascripts
    for bibdocfile in bibarchive.list_latest_files(doctype='panorama'):
        if bibdocfile.get_format() == '.swf':
            pano_index = len(panoramas)
            panoramas.append('embedpano({swf:"%(swf_file)s", target:"panoramabox%(pano_index)s", width:"%(width)s", height:"%(height)s"});' \
                            % {'swf_file': bibdocfile.get_url(),
                               'pano_index': pano_index,
                               'width': width,
                               'height': height})
    if panoramas:
        out = separator.join(['<div id="panoramabox%i" style="margin:auto"></div>' %i for i in xrange(len(panoramas))])
        out += '<script type="text/javascript" src="/js/swfkrpano.js"></script>'
        out += '<script type="text/javascript">' + \
               ''.join(panoramas) + \
               '</script>'

    return out
예제 #15
0
 def setUp(self):
     self.my_bibrecdoc = BibRecDocs(2)
     self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
     self.my_bibdoc = self.my_bibrecdoc.add_new_file(
         CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
         docname=self.unique_name)
     self.my_bibdoc_id = self.my_bibdoc.id
def insert_docfiles_in_modify_form(recid):
	bibrecdocs = BibRecDocs(recid)
	# Create the list of files based on current files and performed
	# actions
	bibdocs = bibrecdocs.display()
	bibdocs = bibdocs.replace("<small><b>hgf_file</b> file(s):</small>","") #delete that part et the beginning of html
	return bibdocs #bibdocs already html formatted 
예제 #17
0
    def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
            else:
                return warning_page(_("Not enough information to retrieve the document"), req, ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(
            req,
            "%s/%s/%s/files/%s%s?ln=%s%s"
            % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and "version=%s" % version or ""),
            apache.HTTP_MOVED_PERMANENTLY,
        )
예제 #18
0
def check_records(records):
    for record in records:
        ## Stupid hack because bibcheck filters does not work as expected
        if record_get_field_value(record, '980', code='b') == "Hindawi":
            record.warn("Working on this record")
            recdoc = BibRecDocs(int(record.record_id))
            doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0])
            try:
                xml_file = open(doc.get_file("xml").get_full_path())
            except:
                record.warn("No document can be found")
                continue
            xml2 = xml.dom.minidom.parseString(xml_file.read())
            subject = get_value_in_tag(xml2, "subject")
            if subject in ["Editorial", "Erratum", "Corrigendum", "Addendum","Letter to the Editor"]:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in ['ERRATUM', 'ADDENDUM', 'EDITORIAL','CORRIGENDUM', 'LETTER TO THE EDITOR']:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, subject.upper())
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', subject.upper())
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', subject.upper())
                        break
            elif subject not in ["Review Article","Research Article","Retraction"]:
                raise Exception("This subject: %s does not exit in SCOAP3 system" % (subject,))
예제 #19
0
def format_element(bfo, width="", caption="yes", max="3"):
    """
    Display image of the plot if we are in selected plots collections

    @param width: the width of the returned image (Eg: '100px')
    @param caption: display the captions or not?
    @param max: the maximum number of plots to display (-1 is all plots)
    """
    ## To achieve this, we take the pngs associated with this document

    img_files = []
    max = int(max)
    link = ""

    bibarchive = BibRecDocs(bfo.recID)

    if width != "":
        width = 'width="%s"' % width

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":

                try:
                    caption_text = _file.get_description()[5:]
                    index = int(_file.get_description()[:5])
                    img_location = _file.get_url()
                except:
                    # FIXME: we have hit probably a plot context file,
                    # so ignore this document; but it would be safer
                    # to check subformat type, so that we don't mask
                    # other eventual errors here.
                    continue

                img = '<img style="vertical-align:middle;" src="%s" title="%s" %s/>' % \
                      (img_location, caption_text, width)

                link = create_html_link(urlbase='%s/%s/%s/plots#%d' %
                                                (CFG_SITE_URL, CFG_SITE_RECORD, bfo.recID,\
                                                 index),
                                        urlargd={},
                                        link_label=img)

                img_files.append((index, link))

    img_files = sorted(img_files, key=lambda x: x[0])
    if max > 0:
        img_files = img_files[:max]
        link = "<a href='/record/" + bfo.control_field(
            '001') + "/plots'>Show more plots</a>"

    for index in range(len(img_files)):
        img_files[index] = img_files[index][1]

    if len(img_files) == 0:
        return ''

    return '<div style="overflow-x:auto;display:inline;width:100%;">' +\
           " ".join(img_files) + ' ' + link + '</div>'
예제 #20
0
def get_rawtext_from_record(record):
    bibrec = BibRecDocs(record.record_id)
    bibdoc = get_latest_pdf(bibrec.list_latest_files())
    try:
        rawtext = bibdoc.bibdoc.get_text()
    except:
        return ''
    return rawtext
예제 #21
0
def record_has_arxiv_pdf(recid=None):
    if recid is None:
        return False
    brd = BibRecDocs(recid)
    for bdf in brd.list_latest_files(doctype="arXiv"):
        if bdf.format.lower() in ('.pdf', '.pdfa'):
            return True
    return False
def check_records(records):
    for record in records:
        if is_springer(record):
            rec_doc = BibRecDocs(int(record.record_id))
            rec_docs = rec_doc.list_latest_files()
            for doc in rec_docs:
                if doc.get_format() == '.xml':
                    f = open(doc.get_full_path())
                    content = f.read()
                    try:
                        del record['100']
                        del record['700']
                        record.amended = True
                    except:
                        pass

                    first_author = True
                    try:
                        if "-//NLM//DTD JATS" in content:
                            jats = JATSParser()
                            authors = jats.get_authors(parseString(content))
                        else:
                            app = NLMParser()
                            authors = app.get_authors(parseString(content))
                    except:
                        record.warn('Problem with parsing XML.')
                        continue

                    for author in authors:
                        if author.get('surname'):
                            subfields = [
                                ('a',
                                 '%s, %s' % (author.get('surname'),
                                             author.get('given_name')
                                             or author.get('initials', '')))
                            ]
                        else:
                            subfields = [('a', '%s' % (author.get('name', '')))
                                         ]
                        if 'orcid' in author:
                            subfields.append(('j', author['orcid']))
                        if 'affiliation' in author:
                            for aff in author["affiliation"]:
                                subfields.append(('v', aff))

                        add_nations_field(subfields)

                        if author.get('email'):
                            subfields.append(('m', author['email']))
                        if first_author:
                            record.add_field('100__',
                                             value='',
                                             subfields=subfields)
                            first_author = False
                        else:
                            record.add_field('700__',
                                             value='',
                                             subfields=subfields)
예제 #23
0
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        from invenio.bibdocfile import BibRecDocs
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        import invenio.template
        tmpl = invenio.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
예제 #24
0
def format(bfo, width="", caption="yes", max="-1"):
    """
    Display image of the plot if we are in selected plots collections

    @param width: the width of the returned image (Eg: '100px')
    @param separator: a separator between images
    @param caption: display the captions or not?
    @param max: the maximum number of plots to display (-1 is all plots)
    """
    ## To achieve this, we take the pngs associated with this document

    img_files = []
    max = int(max)

    bibarchive = BibRecDocs(bfo.recID)

    if width != "":
        width = 'width="%s"' % width

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":

                try:
                    caption_text = _file.get_description()[5:]
                    index = int(_file.get_description()[:5])
                    img_location = _file.get_url()
                except:
                    # FIXME: we have hit probably a plot context file,
                    # so ignore this document; but it would be safer
                    # to check subformat type, so that we don't mask
                    # other eventual errors here.
                    continue

                img = '<img src="%s" title="%s" %s/>' % \
                      (img_location, caption_text, width)

                link = create_html_link(urlbase='%s/record/%s/plots#%d' %
                                                (CFG_SITE_URL, bfo.recID,\
                                                 index),
                                        urlargd={},
                                        link_label=img)

                img_files.append((index, link))

    img_files = sorted(img_files, key=lambda x: x[0])
    if max > 0:
        img_files = img_files[:max]

    for index in range(len(img_files)):
        img_files[index] = img_files[index][1]

    if len(img_files) == 0:
        return ''

    return '<div style="overflow-x:scroll;width=100%;white-space:nowrap">' +\
           " ".join(img_files) + '</div>'
예제 #25
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir,file)
            if not bibrecdocs.check_file_exists(fullpath):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
예제 #26
0
def get_filetypes(recid):
    """
        Returns filetypes extensions associated with given record.

        Takes as a parameter the recid of a record.
        @param url_field: recid of a record
    """
    docs = BibRecDocs(recid)
    return [_get_filetype(d.format) for d in docs.list_latest_files()]
예제 #27
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir, file)
            if not bibrecdocs.check_file_exists(fullpath):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
예제 #28
0
def get_rawtext_from_record_id(record_id):
    bibrec = BibRecDocs(record_id)
    bibdoc = get_latest_pdf(bibrec.list_latest_files())
    try:
        rawtext = bibdoc.bibdoc.get_text()
    except:
        return ''

    return rawtext
예제 #29
0
def get_filetypes(recid):
    """
        Returns filetypes extensions associated with given record.

        Takes as a parameter the recid of a record.
        @param url_field: recid of a record
    """
    docs = BibRecDocs(recid)
    return [_get_filetype(d.format) for d in docs.list_latest_files()]
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        tmpl = invenio.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
예제 #31
0
def get_pdf(recid):
    bibrecdocs = BibRecDocs(recid)
    for bibdoc in bibrecdocs.list_bibdocs():
        if bibdoc.format_already_exists_p(".pdf"):
            docfile = bibdoc.get_file(".pdf")
            return docfile.checksum, docfile.url, ".pdf"
        elif bibdoc.format_already_exists_p(".pdf;pdfa"):
            docfile = bibdoc.get_file(".pdf;pdfa")
            return docfile.checksum, docfile.url, ".pdf;pdfa"
    return None, None, None
예제 #32
0
def list_pdfs(recid):
    rec_info = BibRecDocs(recid)
    docs = rec_info.list_bibdocs()

    for doc in docs:
        for ext in ('pdf', 'pdfa', 'PDF'):
            try:
                yield doc.get_file(ext)
            except InvenioBibDocFileError:
                pass
예제 #33
0
def list_pdfs(recid):
    rec_info = BibRecDocs(recid)
    docs = rec_info.list_bibdocs()

    for doc in docs:
        for ext in ("pdf", "pdfa", "PDF"):
            try:
                yield doc.get_file(ext)
            except InvenioBibDocFileError:
                pass
class BibDocFsInfoTest(unittest.TestCase):
    """Regression tests about the table bibdocfsinfo"""

    def setUp(self):
        self.my_bibrecdoc = BibRecDocs(2)
        self.unique_name = self.my_bibrecdoc.propose_unique_docname("file")
        self.my_bibdoc = self.my_bibrecdoc.add_new_file(
            CFG_PREFIX + "/lib/webtest/invenio/test.jpg", docname=self.unique_name
        )
        self.my_bibdoc_id = self.my_bibdoc.id

    def tearDown(self):
        self.my_bibdoc.expunge()

    def test_hard_delete(self):
        """bibdocfile - test correct update of bibdocfsinfo when hard-deleting"""
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 1
        )
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id,),
            )[0][0],
            True,
        )
        self.my_bibdoc.add_file_new_version(CFG_PREFIX + "/lib/webtest/invenio/test.gif")
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 2
        )
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'",
                (self.my_bibdoc_id,),
            )[0][0],
            True,
        )
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id,),
            )[0][0],
            False,
        )
        self.my_bibdoc.delete_file(".gif", 2)
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id,))[0][0], 1
        )
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id,),
            )[0][0],
            True,
        )
예제 #35
0
def look_for_fulltext(recid):
    rec_info = BibRecDocs(recid)
    docs = rec_info.list_bibdocs()

    for doc in docs:
        for d in doc.list_all_files():
            if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']:
                try:
                    yield doc, d
                except InvenioWebSubmitFileError:
                    pass
예제 #36
0
def look_for_fulltext(recid):
    rec_info = BibRecDocs(recid)
    docs = rec_info.list_bibdocs()

    def check_doc(doc):
        for d in doc.list_all_files():
            if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']:
                return True
        return False

    return (d for d in docs if check_doc(d))
        def look_for_fulltext(recid):
            """Look for fulltext pdf (bibdocfile) for a given recid"""
            rec_info = BibRecDocs(recid)
            docs = rec_info.list_bibdocs()

            for doc in docs:
                for d in doc.list_all_files():
                    if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']:
                        try:
                            yield doc, d
                        except InvenioBibDocFileError:
                            pass
        def look_for_fulltext(recid):
            """Look for fulltext pdf (bibdocfile) for a given recid"""
            rec_info = BibRecDocs(recid)
            docs = rec_info.list_bibdocs()

            for doc in docs:
                for d in doc.list_all_files():
                    if d.get_format().strip('.') in ['pdf', 'pdfa', 'PDF']:
                        try:
                            yield doc, d
                        except InvenioBibDocFileError:
                            pass
예제 #39
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir,current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
예제 #40
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir, current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
예제 #41
0
    def get_pdfa_record(self, path=None):
        from invenio.search_engine import perform_request_search
        xml_doc = self.get_article(path)
        rec = create_record()
        dummy, dummy, dummy, dummy, dummy, dummy, dummy,\
            dummy, doi = self.get_publication_information(xml_doc)
        recid = perform_request_search(p='0247_a:"%s" AND NOT 980:"DELETED"' %
                                       (doi, ))
        if recid:
            record_add_field(rec, '001', controlfield_value=recid[0])
        else:
            record_add_field(rec,
                             '024',
                             ind1='7',
                             subfields=[('a', doi), ('2', 'DOI')])
            message = ('Adding PDF/A. No paper with this DOI: '
                       '%s. Trying to add it anyway.') % (doi, )
            self.logger.error(message)
        try:
            if exists(join(path, 'main_a-2b.pdf')):
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', join(path, 'main_a-2b.pdf')),
                                            ('n', 'main'), ('f', '.pdf;pdfa')])
                self.logger.debug('Adding PDF/A to record: %s' % (doi, ))
            elif exists(join(path, 'main.pdf')):
                record_add_field(rec,
                                 'FFT',
                                 subfields=[('a', join(path, 'main.pdf'))])
                message = 'No PDF/A in VTEX package for record: ' + doi
                self.logger.debug(message)
            else:
                message = "Record %s doesn't contain PDF file." % (doi, )
                raise MissingFFTError(message)
        except MissingFFTError:
            message = "Elsevier paper: %s is missing PDF." % (doi, )
            register_exception(alert_admin=True, prefix=message)
            self.logger.warning(message)

        ## copy other formats to bibupload file
        if recid:
            from invenio.bibdocfile import BibRecDocs
            record = BibRecDocs(recid[0])
            for bibfile in record.list_latest_files():
                if bibfile.get_format() != '.pdf;pdfa':
                    record_add_field(rec,
                                     'FFT',
                                     subfields=[('a', bibfile.get_full_path()),
                                                ('n', bibfile.get_name()),
                                                ('f', bibfile.get_format())])
        return record_xml_output(rec)
 def test_BibDocFiles(self):
     """bibdocfile - BibDocFile functions """
     #add bibdoc
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
     #get url
     self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/record/2/files/img_test.jpg')
     #get type
     self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
     #get path
     self.assert_(my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
     #get bibdocid
     self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
     #get name
     self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
     #get full name
     self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
     #get full path
     self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
     #get format
     self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
     #get version
     self.assertEqual(my_new_bibdocfile.get_version(), 1)
     #get description
     self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
     #get comment
     self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
     #get recid
     self.assertEqual(my_new_bibdocfile.get_recid(), 2)
     #get status
     self.assertEqual(my_new_bibdocfile.get_status(), '')
     #get size
     self.assertEqual(my_new_bibdocfile.get_size(), 91750)
     #get checksum
     self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
     #check
     self.assertEqual(my_new_bibdocfile.check(), True)
     #display
     value = my_new_bibdocfile.display(ln='en')
     assert 'files/img_test.jpg?version=1">' in value
     #hidden?
     self.assertEqual(my_new_bibdocfile.hidden_p(), False)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
예제 #43
0
    def get_pdfa_record(self, path=None):
        from invenio.search_engine import perform_request_search
        xml_doc = self.get_article(path)
        rec = create_record()
        dummy, dummy, dummy, dummy, dummy, dummy, dummy,\
            dummy, doi = self.get_publication_information(xml_doc)
        recid = perform_request_search(p='0247_a:"%s" AND NOT 980:"DELETED"' % (doi,))
        if recid:
            record_add_field(rec, '001', controlfield_value=recid[0])
        else:
            record_add_field(rec, '024', ind1='7', subfields=[('a', doi),
                                                              ('2', 'DOI')])
            message = ('Adding PDF/A. No paper with this DOI: '
                       '%s. Trying to add it anyway.') % (doi,)
            self.logger.error(message)
        try:
            if exists(join(path, 'main_a-2b.pdf')):
                record_add_field(
                    rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')),
                                ('n', 'main'),
                        ('f', '.pdf;pdfa')])
                self.logger.debug('Adding PDF/A to record: %s' % (doi,))
            elif exists(join(path, 'main.pdf')):
                record_add_field(
                    rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))])
                message = 'No PDF/A in VTEX package for record: ' + doi
                self.logger.debug(message)
            else:
                message = "Record %s doesn't contain PDF file." % (doi,)
                raise MissingFFTError(message)
        except MissingFFTError:
            message = "Elsevier paper: %s is missing PDF." % (doi,)
            register_exception(alert_admin=True, prefix=message)
            self.logger.warning(message)

        ## copy other formats to bibupload file
        if recid:
            from invenio.bibdocfile import BibRecDocs
            record = BibRecDocs(recid[0])
            for bibfile in record.list_latest_files():
                if bibfile.get_format() != '.pdf;pdfa':
                    record_add_field(rec,
                                     'FFT',
                                     subfields=[('a', bibfile.get_full_path()),
                                                ('n', bibfile.get_name()),
                                                ('f', bibfile.get_format())]
                                     )
        return record_xml_output(rec)
예제 #44
0
def check_records(records):
    for record in records:
        if is_springer(record):
            rec_doc = BibRecDocs(int(record.record_id))
            rec_docs = rec_doc.list_latest_files()
            for doc in rec_docs:
                if doc.get_format() == '.xml':
                    f = open(doc.get_full_path())
                    content = f.read()
                    try:
                        del record['100']
                        del record['700']
                        record.amended = True
                    except:
                        pass

                    first_author = True
                    try:
                        if "-//NLM//DTD JATS" in content:
                            jats = JATSParser()
                            authors = jats.get_authors(parseString(content))
                        else:
                            app = NLMParser()
                            authors = app.get_authors(parseString(content))
                    except:
                        record.warn('Problem with parsing XML.')
                        continue

                    for author in authors:
                        if author.get('surname'):
                            subfields = [('a', '%s, %s' % (author.get('surname'), author.get('given_name') or author.get('initials', '')))]
                        else:
                            subfields = [('a', '%s' % (author.get('name', '')))]
                        if 'orcid' in author:
                            subfields.append(('j', author['orcid']))
                        if 'affiliation' in author:
                            for aff in author["affiliation"]:
                                subfields.append(('v', aff))

                        add_nations_field(subfields)

                        if author.get('email'):
                                subfields.append(('m', author['email']))
                        if first_author:
                            record.add_field('100__', value='', subfields=subfields)
                            first_author = False
                        else:
                            record.add_field('700__', value='', subfields=subfields)
def upload_fulltext(recid, path):
    '''
        This method save the uploaded file to associated record
        @param (recid) : id of the record
        @param (fulltext) : uploaded document to store
    '''

    # upload the file to the record

    bibarchiv = BibRecDocs(recid)
    docname = path.split('/')[-1].split('.')[0]
    doctype = path.split('.')[-1].split(';')[0]
    bibarchiv.add_new_file(path, CFG_DOCTYPE_UPLOAD_COLLECTION, docname,
                           format=doctype)

    return ''
예제 #46
0
def format_element(bfo, file_format='pdf'):
    """Return the files attached to this record, in order to be
    embedded as a Google Scholar tag.

    @param file_format: the format to include in this output
    """
    if not CFG_WEBSEARCH_ENABLE_GOOGLESCHOLAR:
        return ""

    bibarchive = BibRecDocs(bfo.recID)

    (files, old_versions_p, additionals_p) = get_files(bfo)
    filtered_files = []

    if files.has_key('main_urls') and \
           files['main_urls'].has_key('Main'):
        filtered_files = [f[0] for f in files['main_urls']['Main'] if f[2] == file_format and \
                          not url_is_hidden(f[0], bibarchive)]
    if not filtered_files:
        # Fall back to other doctypes
        if files.has_key('main_urls'):
            for doctype, list_of_files in files['main_urls'].iteritems():
                filtered_files.extend([f[0] for f in list_of_files if f[2] == file_format and \
                                       not url_is_hidden(f[0], bibarchive)])
    if not filtered_files:
        # Fall back to external urls
        if files.has_key('others_urls'):
            filtered_files.extend([file_url for file_url, file_name in files['others_urls'] \
                                   if file_url.endswith('.' + file_format)])

    tags = ['<meta name="citation_pdf_url" content="%s" />' % url for url in filtered_files]

    return "\n".join(tags)
 def setUp(self):
     self.my_bibrecdoc = BibRecDocs(2)
     self.unique_name = self.my_bibrecdoc.propose_unique_docname("file")
     self.my_bibdoc = self.my_bibrecdoc.add_new_file(
         CFG_PREFIX + "/lib/webtest/invenio/test.jpg", docname=self.unique_name
     )
     self.my_bibdoc_id = self.my_bibdoc.id
예제 #48
0
def add_link_to_fulltext(bfo, text):
    """
    Creates a link to fulltext on given text. 
    """
    documents = BibRecDocs(bfo.recID)
    
    # assert we have some files
    if documents and len(documents.bibdocs) == 0:
        return text
    
    # check visibility
    visible_list = []
    
    for doc in documents.bibdocs:
        files = doc.list_latest_files()
        if len(files):
            #try:
            fulltext = files[0]
            if fulltext.status in  ['', 'PUBLIC']:
                visible_list.append(fulltext)
            #except IndexError:
            #    return        
            
    # build url
    if len(visible_list) == 0:
        return text
    elif len(visible_list) == 1:
        #only one, return a direct url to the last version
        return '<a href ="%s">%s</a>' % (visible_list[0].fullurl, text)
    else:
        return '<a href ="%s/record/%s/files">%s</a>' % (CFG_SITE_URL, bfo.recID, text)
def uncook_files(webdeposit_json, recid=None, json_reader=None):
    if 'files' not in webdeposit_json:
        webdeposit_json['files'] = []

    if recid is None:
        for f in json_reader['url']:
            filename = f['url'].split('/')[-1]
            file_json = {
                'name': filename
            }
            webdeposit_json['files'].append(file_json)

    else:
        for f in BibRecDocs(recid, human_readable=True).list_latest_files():
            filename = f.get_full_name()
            path = f.get_path()
            size = f.get_size()
            file_json = {
                'name': filename,
                'file': path,
                'size': size
            }
            webdeposit_json['files'].append(file_json)

    return webdeposit_json
예제 #50
0
def get_filenames(recid):
    """
        Returns names of the files associated with specific record
        and their derivatives. Takes as a parameter the recid of a
        record.

        Example:
        input: recID 999 (record with files ['thesis.ps.gz', 'random.pdf'])
        output: ['thesis.ps.gz', 'thesis.ps', 'thesis',
                 'random.pdf', 'random']
        @param recid: recid of a record
    """
    docs = BibRecDocs(recid)
    names = [_get_filenames(d.name + d.format)
                for d in docs.list_latest_files()]
    return reduce(lambda x,y: x+y, names)
예제 #51
0
def generate_keywords(req, recid, argd):
    """Extracts keywords from the fulltexts (if found) for the
    given recid. It first checks whether the keywords are not already
    stored in the temp file (maybe from the previous run).
    @var req: req object
    @var recid: record id
    @var argd: arguments passed from web
    @keyword store_keywords: boolean, whether to save records in the file
    @return: standard dictionary of kw objects or {}
    """

    ln = argd['ln']
    _ = gettext_set_language(ln)
    keywords = {}

    # check the files were not already generated
    abs_path = bibclassify_engine.get_tmp_file(recid)
    if os.path.exists(abs_path):
        try:
            # Try to load the data from the tmp file
            recs = bibupload.xml_marc_to_records(bibupload.open_marc_file(abs_path))
            return record_get_keywords(recs[0])
        except:
            pass

    # check it is allowed (for this user) to generate pages
    (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify')
    if exit_stat != 0:
        log.info('Access denied: ' + msg)
        msg = _("The site settings do not allow automatic keyword extraction")
        req.write(template.tmpl_page_msg(msg=msg))
        return 0, keywords, None

    # register generation
    bibdocfiles = BibRecDocs(recid).list_latest_files()
    if bibdocfiles:
        # User arrived at a page, but no keywords are available
        inprogress, msg = _doc_already_submitted(recid)
        if argd['generate'] != 'yes':
            # Display a form and give them possibility to generate keywords
            if inprogress:
                req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg)))
            else:
                req.write(template.tmpl_page_generate_keywords(req=req, **argd))
            return 0, keywords, None
        else: # after user clicked on "generate" button
            if inprogress:
                req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg) ))
            else:
                schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY)
                req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' %
                                                 _('We have registered your request, the automated'
                'keyword extraction will run after some time. Please return back in a while.')))

    else:
        req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' %
                    _("Unfortunately, we don't have a PDF fulltext for this record in the storage, \
                    keywords cannot be generated using an automated process.")))

    return 0, keywords, None
예제 #52
0
def check_record(record):
    """
    Validates the checksum of all the BibDocFile's in the record
    """
    record_id = record["001"][0][3]
    docs = BibRecDocs(record_id).list_bibdocs()
    for doc in docs:
        for bibfile in doc.list_latest_files():
            if not os.path.exists(bibfile.fullpath):
                record.set_invalid("File doesn't exists %s" % bibfile.fullpath)
                continue

            if not bibfile.check():
                record.set_invalid("Invalid checksum for file %s" %
                                   bibfile.fullpath)

            if HAS_MAGIC:
                if HAS_MAGIC == 1:
                    magic_mime = magic.from_file(bibfile.fullpath, mime=True)
                else:
                    magic_mime = magic_object.file(bibfile.fullpath)

                if bibfile.mime != magic_mime:
                    record.set_invalid(
                        ("Guessed mime type from extension (%s) is different" +
                         "from guessed mime type from headers (%s)") %
                        (bibfile.mime, magic_mime))
예제 #53
0
def format_element(bfo):
    """
    Prints buttons to download all photos for each size
    """
    current_bibrecdoc = BibRecDocs(bfo.recID)
    if len(current_bibrecdoc.bibdocs) < 2:
        # If we have less than 2 photos, there is no point in displaying the
        # "Download all" buttons
        return
    wrapper = '''<style>
                #downloadallphotos {
                    clear: both;
                    font-size: small;
                    color: #555444;
                    margin-left: 10px;
                }
                #downloadallphotos a {
                    border-radius: 5px;
                    box-shadow: 1px 1px 1px 1px #CCCCCC;
                    color: #222222;
                    display: inline-block;
                    margin: 2px 5px;
                    padding: 3px;
                    text-decoration: none;
                    background-color: #E6E6FA;
                }
                #downloadallphotos a:hover {
                    background: -moz-linear-gradient(center top , #3A3A3A 0%, #7D7E7D 100%) repeat scroll 0 0 rgba(0, 0, 0, 0);
                    color: #fff;
                }
                </style>'''
    wrapper += '''<div id="downloadallphotos">Download all pictures:'''
    buttons = ''
    for (size, internal_size) in CFG_BIBDOCFILE_SUBFORMATS_TRANSLATIONS:
        total = current_bibrecdoc.get_total_size_latest_version(
            bfo.user_info, internal_size)
        # don't display the button if the size will be 0
        if total:
            buttons += '<a %(original)s href="%(site)s/record/%(recID)s/files/allfiles-%(size)s">%(size)s (%(total)s)</a>' \
                % {'original': size == 'original' and 'data-size="Original"' or '',
                   'site': CFG_SITE_URL,
                   'recID': bfo.recID,
                   'size': size,
                   'total': nice_size(total)}
    # If there are no buttons to display, don't display the rest of the HTML
    if buttons:
        return wrapper + buttons
예제 #54
0
def get_pdf_snippets(recID, patterns, user_info):
    """
    Extract text snippets around 'patterns' from the newest PDF file of 'recID'
    The search is case-insensitive.
    The snippets are meant to look like in the results of the popular search
    engine: using " ... " between snippets.
    For empty patterns it returns ""

    @param recID: record ID to consider
    @param patterns: list of patterns to retrieve
    @param nb_words_around: max number of words around the matched pattern
    @param max_snippets: max number of snippets to include
    @return: snippet
    """
    from invenio.bibdocfile import BibRecDocs, check_bibdoc_authorization

    text_path = ""
    text_path_courtesy = ""
    for bd in BibRecDocs(recID).list_bibdocs():
        # Show excluded fulltext in snippets on Inspire, otherwise depending on authorization
        if bd.get_text() and (CFG_INSPIRE_SITE or not check_bibdoc_authorization(user_info, bd.get_status())[0]):
            text_path = bd.get_text_path()
            text_path_courtesy = bd.get_status()
            if CFG_INSPIRE_SITE and not text_path_courtesy:
                # get courtesy from doctype, since docstatus was empty:
                text_path_courtesy = bd.get_type()
                if text_path_courtesy == 'INSPIRE-PUBLIC':
                    # but ignore 'INSPIRE-PUBLIC' doctype
                    text_path_courtesy = ''
            break # stop at the first good PDF textable file

    nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0)
    max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0)
    if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.has_key(text_path_courtesy):
        nb_chars=CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS[text_path_courtesy]
    if CFG_WEBSEARCH_FULLTEXT_SNIPPETS.has_key(text_path_courtesy):
        max_snippets=CFG_WEBSEARCH_FULLTEXT_SNIPPETS[text_path_courtesy]

    if text_path and nb_chars and max_snippets:
        out = ''
        if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'native':
            out = get_text_snippets(text_path, patterns, nb_chars, max_snippets)
            if not out:
                # no hit, so check stemmed versions:
                from invenio.bibindex_engine_stemmer import stem
                stemmed_patterns = [stem(p, 'en') for p in patterns]
                out = get_text_snippets(text_path, stemmed_patterns, nb_chars, max_snippets)
        elif CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'SOLR':
            out = solr_get_snippet(patterns, recID, nb_chars, max_snippets)

        if out:
            out_courtesy = ""
            if CFG_INSPIRE_SITE and text_path_courtesy:
                out_courtesy = '<strong>Snippets courtesy of ' + text_path_courtesy + '</strong><br>'
            return """<div class="snippetbox">%s%s</div>""" % (out_courtesy, out)
        else:
            return ""
    else:
        return ""
예제 #55
0
def get_filenames(recid):
    """
        Returns names of the files associated with specific record
        and their derivatives. Takes as a parameter the recid of a
        record.

        Example:
        input: recID 999 (record with files ['thesis.ps.gz', 'random.pdf'])
        output: ['thesis.ps.gz', 'thesis.ps', 'thesis',
                 'random.pdf', 'random']
        @param recid: recid of a record
    """
    docs = BibRecDocs(recid)
    names = [
        _get_filenames(d.name + d.format) for d in docs.list_latest_files()
    ]
    return reduce(lambda x, y: x + y, names)
예제 #56
0
def goto(cc=CFG_SITE_NAME, p='', f='', sf='', so='d', docname='', format=''):
    """
    Redirect the user to the latest record in the given collection,
    optionally within the specified pattern and field. If docname
    and format are specified, redirect the user to the corresponding
    docname and format. If docname it is not specified, but there is
    only a single bibdoc attached to the record will redirect to that
    one.
    """
    recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so)
    if recids:
        ## We shall take the last recid. This is the last one
        recid = recids[-1]
        url = '/%s/%s' % (CFG_SITE_RECORD, recid)
        if format:
            bibrecdocs = BibRecDocs(recid)
            if not docname:
                if len(bibrecdocs.get_bibdoc_names()) == 1:
                    docname = bibrecdocs.get_bibdoc_names()[0]
                else:
                    return url
            try:
                bibdoc = BibRecDocs(recid).get_bibdoc(docname)
            except InvenioBibDocFileError:
                return url
            try:
                bibdocfile = bibdoc.get_file(format=format)
                return bibdocfile.get_url()
            except InvenioBibDocFileError:
                return url
        return url
class BibDocFsInfoTest(InvenioTestCase):
    """Regression tests about the table bibdocfsinfo"""
    def setUp(self):
        from invenio.bibdocfile import BibRecDocs
        self.my_bibrecdoc = BibRecDocs(2)
        self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
        self.my_bibdoc = self.my_bibrecdoc.add_new_file(
            CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
            docname=self.unique_name)
        self.my_bibdoc_id = self.my_bibdoc.id

    def tearDown(self):
        self.my_bibdoc.expunge()

    def test_hard_delete(self):
        """bibdocfile - test correct update of bibdocfsinfo when hard-deleting"""
        from invenio.dbquery import run_sql
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], True)
        self.my_bibdoc.add_file_new_version(CFG_PREFIX +
                                            '/lib/webtest/invenio/test.gif')
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 2)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'",
                (self.my_bibdoc_id, ))[0][0], True)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], False)
        self.my_bibdoc.delete_file('.gif', 2)
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], True)
def upload_fulltext(recid, path):
    '''
        This method save the uploaded file to associated record
        @param recid: id of the record
        @param path: uploaded document to store
    '''

    # upload the file to the record

    bibarchiv = BibRecDocs(recid)
    docname = path.split('/')[-1].split('.')[0]
    doctype = path.split('.')[-1].split(';')[0]
    bibarchiv.add_new_file(path,
                           CFG_DOCTYPE_UPLOAD_COLLECTION,
                           docname,
                           format=doctype)

    return ''