Exemple #1
0
def _bibdoc_file_list(recid):
    import os, os.path
    from invenio.legacy.bibdocfile.api import BibRecDocs
    try:
        recdocs = BibRecDocs(recid)
    except:
        current_app.logger.error(
            "REST API: Error while building BibRecDocs for record %d" %
            (recid, ))
        return []
    files = []
    for d in recdocs.list_bibdocs():
        df = d.list_latest_files()
        if not df:
            continue
        filename = df[0].get_full_name().decode('utf-8')
        docname, doctype = os.path.splitext(filename)
        if doctype.startswith('.'):
            doctype = doctype[1:]
        files.append({
            'id': d.get_id(),
            'name': docname,
            'type': doctype,
            'size': df[0].get_size(),
        })
    return files
Exemple #2
0
    def _documents_has_been_updated(self, recid):
        from invenio.legacy.bibdocfile.api import BibRecDocs
        import datetime

        bibdocs = BibRecDocs(recid)
        #TODO: replace legacy code
        from invenio.legacy.dbquery import run_sql
        (record_creation_date, record_modification_date) = \
            run_sql("SELECT creation_date, modification_date from bibrec where id=%s"
                    % (recid))[0]

        #wait for a JsonAlchemy bug resolution
        #record = self._get_record(recid)

        #record_modification_date = \
        #    datetime.datetime.strptime(record.get("modification_date"),
        #        "%Y-%m-%dT%H:%M:%S")
        #record_creation_date = \
        #    datetime.datetime.strptime(record.get("creation_date"),
        #        "%Y-%m-%dT%H:%M:%S.%f")
        if not bibdocs.list_bibdocs():
            self.app.logger.debug("No docs for: %s" % recid)
        for b in bibdocs.list_bibdocs():
            #should add fews seconds for rounding problem
            if b.md + datetime.timedelta(
                    seconds=2) >= record_modification_date:
                return True
        return False
 def tearDown(self):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     my_bibrecdoc = BibRecDocs(2)
     #delete
     my_bibrecdoc.delete_bibdoc('img_test')
     my_bibrecdoc.delete_bibdoc('file')
     my_bibrecdoc.delete_bibdoc('test')
def solr_add_range(lower_recid, upper_recid, tags_to_index,
                   next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract = get_field_content_in_utf8(recid, 'abstract',
                                                 tags_to_index)
            author = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword = get_field_content_in_utf8(recid, 'keyword',
                                                tags_to_index)
            title = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs = BibRecDocs(recid)
                fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,
                                                           recid=recid)

    return next_commit_counter
Exemple #5
0
def file(recid=None, filename=None):
    """Serve restricted file for record using provided token.

    This is a simple reimplementation of legacy bibdocfile file serving. Only
    the latest version of a file can be served.

    Note a generated link is independent of
    """
    if not SecretLink.validate_token(request.args.get('token'),
                                     dict(recid=recid)):
        return abort(404)

    try:
        bibarchive = BibRecDocs(recid)
    except InvenioBibDocFileError:
        current_app.logger.warning("File not found.", exc_info=True)
        abort(404)

    if bibarchive.deleted_p():
        abort(410)

    f = get_bibdocfile(bibarchive, filename)

    if f is None:
        abort(404)

    return send_file(f.get_path())
Exemple #6
0
def get_media_from_recid(recid):
    '''
        This method get the file in the given url
        @param recid: id of the file to get
    '''

    medias = []

    bibarchiv = BibRecDocs(recid)
    bibdocs = bibarchiv.list_latest_files()

    for bibdocfile in bibdocs:

        bibfile = {'name': bibdocfile.get_full_name(),
                   'file': '',
                   'type': 'application/%s' % \
                       bibdocfile.get_superformat().split(".")[-1],
                   'path': bibdocfile.get_full_path(),
                   'collection': bibdocfile.get_type(),
                   'size': bibdocfile.get_size(),
                   'loaded': False,
                   'selected': ''}

        if bibfile['collection'] == "Main":
            bibfile['selected'] = 'checked=yes'

        medias.append(bibfile)

    return medias
def format_element(bfo, subformat="480p"):
    """ Creates HTML5 source elements for the given subformat. 
    
    MP4, WebM and OGV are currently supported as video sources.
    The function will scan the bibdocfiles attached to the record for
    videos with these formats and the fiven subformat.
    
    @param subformat: BibDocFile subformat to create the sources from (e.g. 480p)
    """
    video_sources = []
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() == subformat:
                src = bibdocfile.get_url()
                ftype = bibdocfile.get_superformat()[1:]
                if ftype == 'mp4':
                    codecs = 'avc1.42E01E, mp4a.40.2'
                elif ftype == 'webm':
                    codecs = 'vp8, vorbis'
                elif ftype == 'ogv':
                    codecs = 'theora, vorbis'
                source = '<source src=\"%s\" type=\'video/%s; codecs=\"%s\"\' />' % (src, ftype, codecs)
                video_sources.append(source)
    return "\n".join(video_sources)
def create_download_popup(bfo):
    """Create the complete download popup"""
    elements = []
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add it as an element
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv', 
                                                '.mov', '.wmv', '.avi', 
                                                '.mpeg', '.flv', '.mkv'):
                url = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                resolution = bibdocfile.get_subformat()
                size = bibdocfile.get_size()
                elements.append(create_download_element(url, codec, 
                                                        size, resolution))
    if elements:
        return html_skeleton_popup % {
                    'elements': "\n".join(elements)
                    }
    else:
        return ""
def goto(cc=CFG_SITE_NAME, p='', f='', sf='date', so='d',
         docname='', format=''):
    """
    Redirect the user to the latest record in the given collection.

    Redirect the user to the latest record in the given collection,
    optionally within the specified pattern and field. If docname
    and format are specified, redirect the user to the corresponding
    docname and format. If docname it is not specified, but there is
    only a single bibdoc attached to the record will redirect to that
    one.
    """
    recids = perform_request_search(cc=cc, p=p, f=f, sf=sf, so=so)
    if recids:
        # The first is the most recent because they are sorted by date
        # descending.
        recid = recids[0]
        url = '/%s/%s' % (CFG_SITE_RECORD, recid)
        if format:
            bibrecdocs = BibRecDocs(recid)
            if not docname:
                if len(bibrecdocs.get_bibdoc_names()) == 1:
                    docname = bibrecdocs.get_bibdoc_names()[0]
                else:
                    return url
            try:
                bibdoc = BibRecDocs(recid).get_bibdoc(docname)
            except InvenioBibDocFileError:
                return url
            try:
                bibdocfile = bibdoc.get_file(format=format)
                return bibdocfile.get_url()
            except InvenioBibDocFileError:
                return url
        return url
Exemple #10
0
def format_element(bfo,
                   template='record_hb.html',
                   subformat_re='icon.*',
                   as_url=False,
                   **kwargs):
    bibarchive = BibRecDocs(bfo.recID)
    docs = bibarchive.list_bibdocs()
    if len(docs) > 0:
        doc = docs[0]
        icon = doc.get_icon(subformat_re=re.compile(subformat_re))
        if not icon:
            icon = doc.get_icon()
            if not icon:
                return ""

        else:
            if as_url:
                return icon.get_url()
            else:
                ctx = {
                    'icon': icon,
                    'bfo': bfo,
                    'CFG_SITE_URL': current_app.config['CFG_SITE_URL'],
                }
                return template_icon.render(**ctx)
def get_media_from_recid(recid):
    '''
        This method get the file in the given url
        @param recid: id of the file to get
    '''

    medias = []

    bibarchiv = BibRecDocs(recid)
    bibdocs = bibarchiv.list_latest_files()

    for bibdocfile in bibdocs:

        bibfile = {'name': bibdocfile.get_full_name(),
                   'file': '',
                   'type': 'application/%s' % \
                       bibdocfile.get_superformat().split(".")[-1],
                   'path': bibdocfile.get_full_path(),
                   'collection': bibdocfile.get_type(),
                   'size': bibdocfile.get_size(),
                   'loaded': False,
                   'selected': ''}

        if bibfile['collection'] == "Main":
            bibfile['selected'] = 'checked=yes'

        medias.append(bibfile)

    return medias
class BibDocFsInfoTest(InvenioTestCase):
    """Regression tests about the table bibdocfsinfo"""
    def setUp(self):
        from invenio.legacy.bibdocfile.api import BibRecDocs
        self.my_bibrecdoc = BibRecDocs(2)
        self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
        self.my_bibdoc = self.my_bibrecdoc.add_new_file(
            pkg_resources.resource_filename(
                'invenio_demosite.testsuite.regression',
                'data/test.jpg'),
            docname=self.unique_name)
        self.my_bibdoc_id = self.my_bibdoc.id

    def tearDown(self):
        self.my_bibdoc.expunge()

    def test_hard_delete(self):
        """bibdocfile - test correct update of bibdocfsinfo when hard-deleting"""
        from invenio.legacy.dbquery import run_sql
        self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True)
        self.my_bibdoc.add_file_new_version(
            pkg_resources.resource_filename(
                'invenio_demosite.testsuite.regression',
                'data/test.gif'))
        self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 2)
        self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'", (self.my_bibdoc_id, ))[0][0], True)
        self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], False)
        self.my_bibdoc.delete_file('.gif', 2)
        self.assertEqual(run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s", (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(run_sql("SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'", (self.my_bibdoc_id, ))[0][0], True)
Exemple #13
0
def format_element(bfo):
    """
    Display image of the thumbnail plot if we are in selected plots collections
    """
    ## To achieve this, we take the Thumb file associated with this document

    bibarchive = BibRecDocs(bfo.recID)

    img_files = []

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":
                caption_text = _file.get_description()[5:]
                index = int(_file.get_description()[:5])
                img_location = _file.get_url()

                if img_location == "":
                    continue

                img = '<img src="%s" width="100px"/>' % (img_location)
                img_files.append((index, img_location))  # FIXME: was link here

            if _file.get_type() == "Thumb":
                img_location = _file.get_url()
                img = '<img src="%s" width="100px"/>' % (img_location)
                return '<div align="left">' + img + "</div>"

    # then we use the default: the last plot with an image
    img_files = sorted(img_files, key=lambda x: x[0])
    if img_files:
        return '<div align="left">' + img_files[-1][1] + "</div>"
    else:
        return ""
def file(recid=None, filename=None):
    """Serve restricted file for record using provided token.

    This is a simple reimplementation of legacy bibdocfile file serving. Only
    the latest version of a file can be served.

    Note a generated link is independent of
    """
    if not SecretLink.validate_token(request.args.get('token'),
                                     dict(recid=recid)):
        return abort(404)

    try:
        bibarchive = BibRecDocs(recid)
    except InvenioBibDocFileError:
        current_app.logger.warning("File not found.", exc_info=True)
        abort(404)

    if bibarchive.deleted_p():
        abort(410)

    f = get_bibdocfile(bibarchive, filename)

    if f is None:
        abort(404)

    return send_file(f.get_path())
Exemple #15
0
    def _documents_has_been_updated(self, recid):
        from invenio.legacy.bibdocfile.api import BibRecDocs
        import datetime

        bibdocs = BibRecDocs(recid)
        # TODO: replace legacy code
        from invenio.legacy.dbquery import run_sql

        (record_creation_date, record_modification_date) = run_sql(
            "SELECT creation_date, modification_date from bibrec where id=%s" % (recid)
        )[0]

        # wait for a JsonAlchemy bug resolution
        # record = self._get_record(recid)

        # record_modification_date = \
        #    datetime.datetime.strptime(record.get("modification_date"),
        #        "%Y-%m-%dT%H:%M:%S")
        # record_creation_date = \
        #    datetime.datetime.strptime(record.get("creation_date"),
        #        "%Y-%m-%dT%H:%M:%S.%f")
        if not bibdocs.list_bibdocs():
            self.app.logger.debug("No docs for: %s" % recid)
        for b in bibdocs.list_bibdocs():
            # should add fews seconds for rounding problem
            if b.md + datetime.timedelta(seconds=2) >= record_modification_date:
                return True
        return False
Exemple #16
0
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        from invenio.legacy.bibdocfile.api import BibRecDocs
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        import invenio.legacy.template
        tmpl = invenio.legacy.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
Exemple #17
0
 def setUp(self):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     self.my_bibrecdoc = BibRecDocs(2)
     self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
     self.my_bibdoc = self.my_bibrecdoc.add_new_file(
         pkg_resources.resource_filename(
             'invenio_demosite.testsuite.regression', 'data/test.jpg'),
         docname=self.unique_name)
     self.my_bibdoc_id = self.my_bibdoc.id
Exemple #18
0
def format_element(bfo, width="", caption="yes", max="-1"):
    """
    Display image of the plot if we are in selected plots collections

    @param width: the width of the returned image (Eg: '100px')
    @param caption: display the captions or not?
    @param max: the maximum number of plots to display (-1 is all plots)
    """
    ## To achieve this, we take the pngs associated with this document

    img_files = []
    max = int(max)

    bibarchive = BibRecDocs(bfo.recID)

    if width != "":
        width = 'width="%s"' % width

    for doc in bibarchive.list_bibdocs():
        for _file in doc.list_latest_files():
            if _file.get_type() == "Plot":

                try:
                    caption_text = _file.get_description()[5:]
                    index = int(_file.get_description()[:5])
                    img_location = _file.get_url()
                except:
                    # FIXME: we have hit probably a plot context file,
                    # so ignore this document; but it would be safer
                    # to check subformat type, so that we don't mask
                    # other eventual errors here.
                    continue

                img = '<img src="%s" title="%s" %s/>' % \
                      (img_location, caption_text, width)

                link = create_html_link(urlbase='%s/%s/%s/plots#%d' %
                                                (CFG_SITE_URL, CFG_SITE_RECORD, bfo.recID,\
                                                 index),
                                        urlargd={},
                                        link_label=img)

                img_files.append((index, link))

    img_files = sorted(img_files, key=lambda x: x[0])
    if max > 0:
        img_files = img_files[:max]

    for index in range(len(img_files)):
        img_files[index] = img_files[index][1]

    if len(img_files) == 0:
        return ''

    return '<div style="overflow-x:scroll;width=100%;white-space:nowrap">' +\
           " ".join(img_files) + '</div>'
def get_filetypes(recid):
    """
        Returns filetypes extensions associated with given record.

        Takes as a parameter the recid of a record.
        @param url_field: recid of a record
    """
    from invenio.legacy.bibdocfile.api import BibRecDocs
    docs = BibRecDocs(recid)
    return [_get_filetype(d.format) for d in docs.list_latest_files()]
Exemple #20
0
def list_pdfs(recid):
    rec_info = BibRecDocs(recid)
    docs = rec_info.list_bibdocs()

    for doc in docs:
        for ext in ('pdf', 'pdfa', 'PDF'):
            try:
                yield doc.get_file(ext)
            except InvenioBibDocFileError:
                pass
def format_element(bfo):
    """ Format element function to create the select and option elements
    with HTML5 data attributes that store all the necesarry metadata to
    construct video sources with JavaScript."""
    videos = {
        "360p": {"width": 640, "height": 360, "poster": None, "mp4": None, "webm": None, "ogv": None},
        "480p": {"width": 854, "height": 480, "poster": None, "mp4": None, "webm": None, "ogv": None},
        "720p": {"width": 1280, "height": 720, "poster": None, "mp4": None, "webm": None, "ogv": None},
        "1080p": {"width": 1920, "height": 1080, "poster": None, "mp4": None, "webm": None, "ogv": None},
    }
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add the url to the videos dictionary
            if bibdocfile.get_superformat() in (".mp4", ".webm", ".ogv") and bibdocfile.get_subformat() in (
                "360p",
                "480p",
                "720p",
                "1080p",
            ):
                src = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                size = bibdocfile.get_subformat()
                videos[size][codec] = src
            ## When a poster signature is found, add the url to the videos dictionary
            elif bibdocfile.get_comment() in ("POSTER") and bibdocfile.get_subformat() in (
                "360p",
                "480p",
                "720p",
                "1080p",
            ):
                src = bibdocfile.get_url()
                size = bibdocfile.get_subformat()
                videos[size]["poster"] = src
    ## Build video select options for every video size format that was found
    select_options = []
    for key, options in iteritems(videos):
        ## If we have at least one url, the format is available
        if options["mp4"] or options["webm"] or options["ogv"]:
            ## create am option element
            option_element = create_option_element(
                url_webm=options["webm"],
                url_ogv=options["ogv"],
                url_mp4=options["mp4"],
                url_poster=options["poster"],
                width=options["width"],
                height=options["height"],
                subformat=key,
            )
            select_options.append(option_element)
    select_element = create_select_element(select_options)
    return select_element
Exemple #22
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir, current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
Exemple #23
0
def Add_Files(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    if os.path.exists("%s/files" % curdir):
        bibrecdocs = BibRecDocs(sysno)
        for current_file in os.listdir("%s/files" % curdir):
            fullpath = "%s/files/%s" % (curdir,current_file)
            dummy, filename, extension = decompose_file(current_file)
            if extension and extension[0] != ".":
                extension = '.' + extension
            if not bibrecdocs.check_file_exists(fullpath, extension):
                bibrecdocs.add_new_file(fullpath, "Main", never_fail=True)
    return ""
Exemple #24
0
def _bibdoc_modify_files(recid, form):
    from invenio.legacy.bibdocfile.api import BibRecDocs
    try:
        recdocs = BibRecDocs(recid)
    except:
        current_app.logger.error(
            "REST API: Error while building BibRecDocs for record %d" %
            (recid, ))
        return []

    actions = {}

    for (k, v) in form.items():
        if k.startswith('__file__name__'):
            docid = int(k[len('__file__name__'):])
            docname = recdocs.get_docname(docid)
            if docname != v:
                actions[docid] = ('rename', docname, v)
        if k.startswith('__file__delete__') and v == 'Delete':
            docid = int(k[len('__file__delete__'):])
            docname = recdocs.get_docname(docid)
            actions[docid] = ('delete', docname, None)  # overwrite rename

    for (_, (act, docname, newname)) in actions.items():
        if act == 'delete':
            current_app.logger.info("deleting bibdoc/file: {}/'{}'".format(
                recid, docname))
            recdocs.delete_bibdoc(docname)
        elif act == 'rename':
            current_app.logger.info(
                "renaming bibdoc/file: {}/'{}' -> '{}'".format(
                    recid, docname, newname))
            recdocs.change_name(newname=newname, oldname=docname)
Exemple #25
0
def upload_fulltext(recid, path):
    '''
        This method save the uploaded file to associated record
        @param recid: id of the record
        @param path: uploaded document to store
    '''

    # upload the file to the record

    bibarchiv = BibRecDocs(recid)
    docname = path.split('/')[-1].split('.')[0]
    doctype = path.split('.')[-1].split(';')[0]
    bibarchiv.add_new_file(path, CFG_DOCTYPE_UPLOAD_COLLECTION, docname,
                           format=doctype)

    return ''
Exemple #26
0
    def get(self, recid):
        from invenio.legacy.bibdocfile.api import BibRecDocs
        from invenio.legacy.search_engine import check_user_can_view_record

        record = get_record(recid)
        if not record:
            abort(404)
        auth_code, _ = check_user_can_view_record(current_user, recid)
        if auth_code:
            abort(401)
        ids = [recid]
        for k in ['rel_dataset', 'rel_software']:
            ids.extend([int(r) for r in record.get(k, [])])
        files = []
        for recid in ids:
            record_files = BibRecDocs(recid).list_latest_files(
                list_hidden=False)
            files.extend(
                map(
                    lambda f: {
                        'id':
                        f.docid,
                        'name':
                        '%s%s' % (f.name, f.format),
                        'url':
                        url_for(
                            'recordfileresource', recid=recid, fileid=f.docid),
                    }, filter(lambda f: not f.is_icon(), record_files)))
        return files
Exemple #27
0
 def _add_bibdoc_files():
     from invenio.legacy.bibdocfile.api import BibRecDocs
     return dict(zenodo_files=[
         f for f in BibRecDocs(kwargs['recid'], human_readable=True).
         list_latest_files(list_hidden=False)
         if not f.is_icon() and f.is_restricted(current_user)[0] == 0
     ])
Exemple #28
0
def check_record(record):
    """
    Validates the checksum of all the BibDocFile's in the record
    """
    record_id = record["001"][0][3]
    docs = BibRecDocs(record_id).list_bibdocs()
    for doc in docs:
        for bibfile in doc.list_latest_files():
            if not os.path.exists(bibfile.fullpath):
                record.set_invalid("File doesn't exists %s" % bibfile.fullpath)
                continue

            if not bibfile.check():
                record.set_invalid("Invalid checksum for file %s" %
                                   bibfile.fullpath)

            if HAS_MAGIC:
                if HAS_MAGIC == 1:
                    magic_mime = magic.from_file(bibfile.fullpath, mime=True)
                else:
                    magic_mime = magic_object.file(bibfile.fullpath)

                if bibfile.mime != magic_mime:
                    record.set_invalid(
                        ("Guessed mime type from extension (%s) is different" +
                         "from guessed mime type from headers (%s)") %
                        (bibfile.mime, magic_mime))
Exemple #29
0
class BibDocFsInfoTest(InvenioTestCase):
    """Regression tests about the table bibdocfsinfo"""
    def setUp(self):
        from invenio.legacy.bibdocfile.api import BibRecDocs
        self.my_bibrecdoc = BibRecDocs(2)
        self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
        self.my_bibdoc = self.my_bibrecdoc.add_new_file(
            pkg_resources.resource_filename(
                'invenio_demosite.testsuite.regression', 'data/test.jpg'),
            docname=self.unique_name)
        self.my_bibdoc_id = self.my_bibdoc.id

    def tearDown(self):
        self.my_bibdoc.expunge()

    def test_hard_delete(self):
        """bibdocfile - test correct update of bibdocfsinfo when hard-deleting"""
        from invenio.legacy.dbquery import run_sql
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], True)
        self.my_bibdoc.add_file_new_version(
            pkg_resources.resource_filename(
                'invenio_demosite.testsuite.regression', 'data/test.gif'))
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 2)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=2 AND format='.gif'",
                (self.my_bibdoc_id, ))[0][0], True)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], False)
        self.my_bibdoc.delete_file('.gif', 2)
        self.assertEqual(
            run_sql("SELECT MAX(version) FROM bibdocfsinfo WHERE id_bibdoc=%s",
                    (self.my_bibdoc_id, ))[0][0], 1)
        self.assertEqual(
            run_sql(
                "SELECT last_version FROM bibdocfsinfo WHERE id_bibdoc=%s AND version=1 AND format='.jpg'",
                (self.my_bibdoc_id, ))[0][0], True)
Exemple #30
0
def get_pdf_snippets(recID, patterns, user_info):
    """
    Extract text snippets around 'patterns' from the newest PDF file of 'recID'
    The search is case-insensitive.
    The snippets are meant to look like in the results of the popular search
    engine: using " ... " between snippets.
    For empty patterns it returns ""

    @param recID: record ID to consider
    @param patterns: list of patterns to retrieve
    @param user_info: the user_info object from collect_user_info
    @return: snippet
    """
    from invenio.legacy.bibdocfile.api import BibRecDocs, check_bibdoc_authorization

    text_path = ""
    text_path_courtesy = ""
    for bd in BibRecDocs(recID).list_bibdocs():
        # Show excluded fulltext in snippets on Inspire, otherwise depending on authorization
        if hasattr(bd, 'get_text') and (CFG_INSPIRE_SITE or not check_bibdoc_authorization(user_info, bd.get_status())[0]):
            text_path = bd.get_text_path()
            text_path_courtesy = bd.get_status()
            if CFG_INSPIRE_SITE and not text_path_courtesy:
                # get courtesy from doctype, since docstatus was empty:
                text_path_courtesy = bd.get_type()
                if text_path_courtesy == 'INSPIRE-PUBLIC':
                    # but ignore 'INSPIRE-PUBLIC' doctype
                    text_path_courtesy = ''
            break # stop at the first good PDF textable file

    nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS.get('', 0)
    max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS.get('', 0)
    if text_path_courtesy in CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS:
        nb_chars = CFG_WEBSEARCH_FULLTEXT_SNIPPETS_CHARS[text_path_courtesy]
    if text_path_courtesy in CFG_WEBSEARCH_FULLTEXT_SNIPPETS:
        max_snippets = CFG_WEBSEARCH_FULLTEXT_SNIPPETS[text_path_courtesy]

    if text_path and nb_chars and max_snippets:
        out = ''
        if CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'native':
            out = get_text_snippets(text_path, patterns, nb_chars, max_snippets)
            if not out:
                # no hit, so check stemmed versions:
                from invenio.legacy.bibindex.engine_stemmer import stem
                stemmed_patterns = [stem(p, 'en') for p in patterns]
                out = get_text_snippets(text_path, stemmed_patterns, nb_chars, max_snippets)
        elif CFG_WEBSEARCH_FULLTEXT_SNIPPETS_GENERATOR == 'SOLR':
            from invenio.legacy.miscutil.solrutils_bibindex_searcher import solr_get_snippet
            out = solr_get_snippet(patterns, recID, nb_chars, max_snippets)

        if out:
            out_courtesy = ""
            if CFG_INSPIRE_SITE and text_path_courtesy:
                out_courtesy = '<strong>Snippets courtesy of ' + text_path_courtesy + '</strong><br>'
            return '%s%s' % (out_courtesy, out)
        else:
            return ""
    else:
        return ""
def fix_recid(recid, logfile):
    """Fix a given recid."""
    print("Upgrading record %s ->" % recid, end=' ')
    print("Upgrading record %s:" % recid, file=logfile)

    bibrec = BibRecDocs(recid)
    print(bibrec, file=logfile)
    docnames = bibrec.get_bibdoc_names()
    try:
        for docname in docnames:
            print(docname, end=' ')
            new_bibdocs = bibrec.fix(docname)
            new_bibdocnames = [
                bibrec.get_docname(bibdoc.id) for bibdoc in new_bibdocs
            ]
            if new_bibdocnames:
                print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames),
                      end=' ')
                print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames),
                      file=logfile)
    except InvenioBibDocFileError as e:
        print(BibRecDocs(recid), file=logfile)
        print("%s -> ERROR", e)
        return False
    else:
        print(BibRecDocs(recid), file=logfile)
        print("-> OK")
        return True
Exemple #32
0
def _bibdoc_modify_files(recid, form):
    from invenio.legacy.bibdocfile.api import BibRecDocs
    try:
        recdocs = BibRecDocs(recid)
    except:
        current_app.logger.error("REST API: Error while building BibRecDocs for record %d" % (recid,))
        return []

    actions = {}

    for (k,v) in form.items():
        if k.startswith('__file__name__'):
            docid = int(k[len('__file__name__'):])
            docname = recdocs.get_docname(docid)
            if docname != v:
                actions[docid] = ('rename', docname, v)
        if k.startswith('__file__delete__') and v == 'Delete':
            docid = int(k[len('__file__delete__'):])
            docname = recdocs.get_docname(docid)
            actions[docid] = ('delete', docname, None) # overwrite rename

    for (_,(act, docname, newname)) in actions.items():
        if act == 'delete':
            current_app.logger.info("deleting bibdoc/file: {}/'{}'".format(recid, docname))
            recdocs.delete_bibdoc(docname)
        elif act == 'rename':
            current_app.logger.info("renaming bibdoc/file: {}/'{}' -> '{}'".format(recid, docname, newname))
            recdocs.change_name(newname=newname, oldname=docname)
def upload_fulltext(recid, path):
    '''
        This method save the uploaded file to associated record
        @param recid: id of the record
        @param path: uploaded document to store
    '''

    # upload the file to the record

    bibarchiv = BibRecDocs(recid)
    docname = path.split('/')[-1].split('.')[0]
    doctype = path.split('.')[-1].split(';')[0]
    bibarchiv.add_new_file(path,
                           CFG_DOCTYPE_UPLOAD_COLLECTION,
                           docname,
                           format=doctype)

    return ''
Exemple #34
0
 def _bibdocfile_update(obj, eng):
     if process:
         d = Deposition(obj)
         sip = d.get_latest_sip(sealed=False)
         recid = sip.metadata.get('recid')
         if recid:
             brd = BibRecDocs(int(recid))
             process(d, brd)
             d.update()
Exemple #35
0
def xapian_add_all(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Xapian.
    It preserves the fulltext information.
    """
    xapian_init_databases()
    for recid in range(lower_recid, upper_recid + 1):
        try:
            abstract = unicode(
                get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
        except:
            abstract = ""
        xapian_add(recid, "abstract", abstract)

        try:
            first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
            additional_authors = reduce(
                lambda x, y: x + " " + y,
                get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')
            author = unicode(first_author + " " + additional_authors, 'utf-8')
        except:
            author = ""
        xapian_add(recid, "author", author)

        try:
            bibrecdocs = BibRecDocs(recid)
            fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
        except:
            fulltext = ""
        xapian_add(recid, "fulltext", fulltext)

        try:
            keyword = unicode(
                reduce(lambda x, y: x + " " + y,
                       get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8')
        except:
            keyword = ""
        xapian_add(recid, "keyword", keyword)

        try:
            title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8')
        except:
            title = ""
        xapian_add(recid, "title", title)
def bst_openaire_check_rights():
    """
    Tasklet to verify access rights consistency.
    """
    restrictions = {
        'cc0': '',
        'openAccess': '',
        'closedAccess': 'status: closedAccess',
        'restrictedAccess': 'status: restrictedAccess',
        'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any',
    }

    errors = []

    keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys()

    for access_rights in keys:
        write_message(
            "Checking records with access rights '%s'" % access_rights)
        recids = search_pattern(p=access_rights, f="542__l")

        for r in recids:
            date = ''
            if access_rights == 'embargoedAccess':
                try:
                    date = get_fieldvalues(r, "942__a")[0]
                except IndexError:
                    raise Exception(
                        "Embargoed record %s is missing embargo date in 942__a"
                        % r
                    )
            expected_status = restrictions[access_rights] % {'date': date}

            brd = BibRecDocs(r)
            for d in brd.list_bibdocs():
                real_status = d.get_status()
                if real_status != expected_status:
                    d.set_status(expected_status)
                    write_message(
                        "Fixed record %s with wrong status. From: %s To: %s" %
                        (r, real_status, expected_status))

    for e in errors:
        write_message(e)
Exemple #37
0
def download_one(recid, version):
    """Download given version of the PDF from arxiv"""
    write_message('fetching %s' % recid)
    for count, arxiv_id in enumerate(extract_arxiv_ids_from_recid(recid)):
        if count != 0:
            write_message("Warning: %s has multiple arxiv #" % recid)
            continue

        url_for_pdf = build_arxiv_url(arxiv_id, version)
        filename_arxiv_id = arxiv_id.replace('/', '_')
        temp_file = NamedTemporaryFile(prefix="arxiv-pdf-checker",
                                       dir=CFG_TMPSHAREDDIR,
                                       suffix="%s.pdf" % filename_arxiv_id)
        write_message('downloading pdf from %s' % url_for_pdf)
        path = download_external_url(url_for_pdf,
                                     temp_file.name,
                                     content_type='pdf')

        # Check if it is not an html not found page
        filesize = os.path.getsize(path)
        if filesize < 25000:
            f = open(path)
            try:
                for line in f:
                    if 'PDF unavailable' in line:
                        raise PdfNotAvailable()
            finally:
                f.close()

        docs = BibRecDocs(recid)
        bibdocfiles = docs.list_latest_files(doctype="arXiv")

        needs_update = False
        try:
            bibdocfile = bibdocfiles[0]
        except IndexError:
            bibdocfile = None
            needs_update = True
        else:
            existing_md5 = calculate_md5(bibdocfile.fullpath)
            new_md5 = calculate_md5(path.encode('utf-8'))
            if new_md5 != existing_md5:
                write_message('md5 differs updating')
                needs_update = True
            else:
                write_message('md5 matches existing pdf, skipping')

        if needs_update:
            if bibdocfiles:
                write_message('adding as new version')
                docs.add_new_version(path, docname=bibdocfile.name)
            else:
                write_message('adding as new file')
                docs.add_new_file(path,
                                  doctype="arXiv",
                                  docname="arXiv:%s" % filename_arxiv_id)
        else:
            raise FoundExistingPdf()
 def setUp(self):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     self.my_bibrecdoc = BibRecDocs(2)
     self.unique_name = self.my_bibrecdoc.propose_unique_docname('file')
     self.my_bibdoc = self.my_bibrecdoc.add_new_file(
         pkg_resources.resource_filename(
             'invenio_demosite.testsuite.regression',
             'data/test.jpg'),
         docname=self.unique_name)
     self.my_bibdoc_id = self.my_bibdoc.id
Exemple #39
0
def bst_openaire_check_rights():
    """
    Tasklet to verify access rights consistency.
    """
    restrictions = {
        'cc0': '',
        'openAccess': '',
        'closedAccess': 'status: closedAccess',
        'restrictedAccess': 'status: restrictedAccess',
        'embargoedAccess': 'firerole: deny until "%(date)s"\nallow any',
    }

    errors = []

    keys = dict(current_app.config['CFG_ACCESS_RIGHTS_KEYS']).keys()

    for access_rights in keys:
        write_message("Checking records with access rights '%s'" %
                      access_rights)
        recids = search_pattern(p=access_rights, f="542__l")

        for r in recids:
            date = ''
            if access_rights == 'embargoedAccess':
                try:
                    date = get_fieldvalues(r, "942__a")[0]
                except IndexError:
                    raise Exception(
                        "Embargoed record %s is missing embargo date in 942__a"
                        % r)
            expected_status = restrictions[access_rights] % {'date': date}

            brd = BibRecDocs(r)
            for d in brd.list_bibdocs():
                real_status = d.get_status()
                if real_status != expected_status:
                    d.set_status(expected_status)
                    write_message(
                        "Fixed record %s with wrong status. From: %s To: %s" %
                        (r, real_status, expected_status))

    for e in errors:
        write_message(e)
Exemple #40
0
 def _get_text(self, recid):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     text = BibRecDocs(recid).get_text(True)
     if not text:
         self.app.logger.debug("No text for:%s" % recid)
         return None
     return {
         "fulltext": text,
         "recid": recid,
         "_id": recid,
         "_parent": recid
     }
def solr_add_range(lower_recid, upper_recid, tags_to_index, next_commit_counter):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Solr.
    It preserves the fulltext information.
    """
    for recid in range(lower_recid, upper_recid + 1):
        if record_exists(recid):
            abstract        = get_field_content_in_utf8(recid, 'abstract', tags_to_index)
            author          = get_field_content_in_utf8(recid, 'author', tags_to_index)
            keyword         = get_field_content_in_utf8(recid, 'keyword', tags_to_index)
            title           = get_field_content_in_utf8(recid, 'title', tags_to_index)
            try:
                bibrecdocs  = BibRecDocs(recid)
                fulltext    = unicode(bibrecdocs.get_text(), 'utf-8')
            except:
                fulltext    = ''

            solr_add(recid, abstract, author, fulltext, keyword, title)
            next_commit_counter = solr_commit_if_necessary(next_commit_counter,recid=recid)

    return next_commit_counter
Exemple #42
0
def format_element(bfo, template='record_hb.html', subformat_re='icon.*', as_url=False, **kwargs):
    bibarchive = BibRecDocs(bfo.recID)
    docs = bibarchive.list_bibdocs()
    if len(docs) > 0:
        doc = docs[0]
        icon = doc.get_icon(subformat_re=re.compile(subformat_re))
        if not icon:
            icon = doc.get_icon()
            if not icon:
                return ""

        else:
            if as_url:
                return icon.get_url()
            else:
                ctx = {
                    'icon': icon,
                    'bfo': bfo,
                    'CFG_SITE_URL': current_app.config['CFG_SITE_URL'],
                }
                return template_icon.render(**ctx)
Exemple #43
0
    def _add_record_variables():
        from invenio.legacy.bibdocfile.api import BibRecDocs
        from invenio.modules.records.api import get_record

        ctx = dict(
            daap_files=[
                f for f in BibRecDocs(kwargs['recid'], human_readable=True).
                list_latest_files(list_hidden=False) if not f.is_icon()
            ],
            # this updates the DB, but avoids ugly caching
            daap_record=get_record(kwargs['recid'], True))
        return ctx
Exemple #44
0
 def tearDown(self):
     from invenio.legacy.bibdocfile.api import BibRecDocs
     my_bibrecdoc = BibRecDocs(2)
     #delete
     my_bibrecdoc.delete_bibdoc('img_test')
     my_bibrecdoc.delete_bibdoc('file')
     my_bibrecdoc.delete_bibdoc('test')
def Move_Files_Archive(parameters, curdir, form, user_info=None):
    """DEPRECATED: Use FFT instead."""
    MainDir = "%s/files/MainFiles" % curdir
    IncludeDir = "%s/files/AdditionalFiles" % curdir
    watcheddirs = {'Main' : MainDir, 'Additional' : IncludeDir}
    for type, dir in iteritems(watcheddirs):
        if os.path.exists(dir):
            formats = {}
            files = os.listdir(dir)
            files.sort()
            for file in files:
                dummy, filename, extension = decompose_file(file)
                if filename not in formats:
                    formats[filename] = []
                formats[filename].append(normalize_format(extension))
            # first delete all missing files
            bibarchive = BibRecDocs(sysno)
            existingBibdocs = bibarchive.list_bibdocs(type)
            for existingBibdoc in existingBibdocs:
                if bibarchive.get_docname(existingBibdoc.id) not in formats:
                    existingBibdoc.delete()
            # then create/update the new ones
            for key in formats.keys():
                # instanciate bibdoc object
                bibarchive.add_new_file('%s/%s%s' % (dir, key, formats[key]), doctype=type, never_fail=True)
    return ""
def get_files_from_bibdoc(recid):
    """
    Retrieves using BibDoc all the files related with a given record

    @param recid

    @return List of dictionaries containing all the information stored
            inside BibDoc if the current record has files attached, the
            empty list otherwise
    """
    if not recid or recid < 0:
        return []

    from invenio.legacy.bibdocfile.api import BibRecDocs, InvenioBibDocFileError
    files = []
    try:
        bibrecdocs = BibRecDocs(int(recid))
    except InvenioBibDocFileError:
        return []
    latest_files = bibrecdocs.list_latest_files()
    for afile in latest_files:
        file_dict = {}
        file_dict['comment'] = afile.get_comment()
        file_dict['description'] = afile.get_description()
        file_dict['eformat'] = afile.get_format()
        file_dict['full_name'] = afile.get_full_name()
        file_dict['full_path'] = afile.get_full_path()
        file_dict['magic'] = afile.get_magic()
        file_dict['name'] = afile.get_name()
        file_dict['path'] = afile.get_path()
        file_dict['size'] = afile.get_size()
        file_dict['status'] = afile.get_status()
        file_dict['subformat'] = afile.get_subformat()
        file_dict['superformat'] = afile.get_superformat()
        file_dict['type'] = afile.get_type()
        file_dict['url'] = afile.get_url()
        file_dict['version'] = afile.get_version()
        files.append(file_dict)
    return files
def format_element(bfo):
    """ Format element function to create the select and option elements
    with HTML5 data attributes that store all the necesarry metadata to
    construct video sources with JavaScript."""
    videos = {
             '360p': {'width': 640, 'height': 360, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None},
             '480p': {'width': 854,'height': 480, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None,},
             '720p': {'width': 1280, 'height': 720, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None},
             '1080p': {'width': 1920, 'height': 1080, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}
             }
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add the url to the videos dictionary
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'):
                src = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                size = bibdocfile.get_subformat()
                videos[size][codec] = src
            ## When a poster signature is found, add the url to the videos dictionary
            elif bibdocfile.get_comment() in ('SUGGESTIONTUMB', 'BIGTHUMB', 'POSTER', 'SMALLTHUMB') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'):
                src = bibdocfile.get_url()
                size = bibdocfile.get_subformat()
                videos[size]['poster'] = src
    ## Build video select options for every video size format that was found
    select_options = []
    for key, options in iteritems(videos):
        ## If we have at least one url, the format is available
        if options['mp4'] or options['webm'] or options['ogv']:
            ## create am option element
            option_element = create_option_element(url_webm=options['webm'], url_ogv=options['ogv'], url_mp4=options['mp4'],
                                                   url_poster=options['poster'], width=options['width'], height=options['height'],
                                                   subformat=key)
            select_options.append(option_element)
    select_element = create_select_element(select_options)
    return select_element
def format_element(bfo):
    """ Format element function to create the select and option elements
    with HTML5 data attributes that store all the necesarry metadata to
    construct video sources with JavaScript."""
    videos = {
             '360p': {'width': 640, 'height': 360, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None},
             '480p': {'width': 854,'height': 480, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None,},
             '720p': {'width': 1280, 'height': 720, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None},
             '1080p': {'width': 1920, 'height': 1080, 'poster': None, 'mp4': None, 'webm': None, 'ogv': None}
             }
    recdoc = BibRecDocs(bfo.recID)
    bibdocs = recdoc.list_bibdocs()
    ## Go through all the BibDocs and search for video related signatures
    for bibdoc in bibdocs:
        bibdocfiles = bibdoc.list_all_files()
        for bibdocfile in bibdocfiles:
            ## When a video signature is found, add the url to the videos dictionary
            if bibdocfile.get_superformat() in ('.mp4', '.webm', '.ogv') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'):
                src = bibdocfile.get_url()
                codec = bibdocfile.get_superformat()[1:]
                size = bibdocfile.get_subformat()
                videos[size][codec] = src
            ## When a poster signature is found, add the url to the videos dictionary
            elif bibdocfile.get_comment() in ('POSTER') and bibdocfile.get_subformat() in ('360p', '480p', '720p', '1080p'):
                src = bibdocfile.get_url()
                size = bibdocfile.get_subformat()
                videos[size]['poster'] = src
    ## Build video select options for every video size format that was found
    select_options = []
    for key, options in iteritems(videos):
        ## If we have at least one url, the format is available
        if options['mp4'] or options['webm'] or options['ogv']:
            ## create am option element
            option_element = create_option_element(url_webm=options['webm'], url_ogv=options['ogv'], url_mp4=options['mp4'],
                                                   url_poster=options['poster'], width=options['width'], height=options['height'],
                                                   subformat=key)
            select_options.append(option_element)
    select_element = create_select_element(select_options)
    return select_element
def xapian_add_all(lower_recid, upper_recid):
    """
    Adds the regarding field values of all records from the lower recid to the upper one to Xapian.
    It preserves the fulltext information.
    """
    xapian_init_databases()
    for recid in range(lower_recid, upper_recid + 1):
        try:
            abstract = unicode(get_fieldvalues(recid, CFG_MARC_ABSTRACT)[0], 'utf-8')
        except:
            abstract = ""
        xapian_add(recid, "abstract", abstract)

        try:
            first_author = get_fieldvalues(recid, CFG_MARC_AUTHOR_NAME)[0]
            additional_authors = reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_ADDITIONAL_AUTHOR_NAME), '')
            author = unicode(first_author + " " + additional_authors, 'utf-8')
        except:
            author = ""
        xapian_add(recid, "author", author)

        try:
            bibrecdocs = BibRecDocs(recid)
            fulltext = unicode(bibrecdocs.get_text(), 'utf-8')
        except:
            fulltext = ""
        xapian_add(recid, "fulltext", fulltext)

        try:
            keyword = unicode(reduce(lambda x, y: x + " " + y, get_fieldvalues(recid, CFG_MARC_KEYWORD), ''), 'utf-8')
        except:
            keyword = ""
        xapian_add(recid, "keyword", keyword)

        try:
            title = unicode(get_fieldvalues(recid, CFG_MARC_TITLE)[0], 'utf-8')
        except:
            title = ""
        xapian_add(recid, "title", title)
Exemple #50
0
    def _getfile_py(req,
                    recid=0,
                    docid=0,
                    version="",
                    name="",
                    docformat="",
                    ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieve the requested file."
                          ), req, ln)
            else:
                return warning_page(
                    _('Not enough information to retrieve the document'), req,
                    ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(
                        _("An error has happened in trying to retrieving the requested file."
                          ), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(
            req, '%s/%s/%s/files/%s%s?ln=%s%s' %
            (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln,
             version and 'version=%s' % version or ''),
            apache.HTTP_MOVED_PERMANENTLY)
def get_files_from_bibdoc(recid):
    """
    Retrieves using BibDoc all the files related with a given record

    @param recid

    @return List of dictionaries containing all the information stored
            inside BibDoc if the current record has files attached, the
            empty list otherwise
    """
    if not recid or recid < 0:
        return []

    from invenio.legacy.bibdocfile.api import BibRecDocs, InvenioBibDocFileError
    files = []
    try:
        bibrecdocs = BibRecDocs(int(recid))
    except InvenioBibDocFileError:
        return []
    latest_files = bibrecdocs.list_latest_files()
    for afile in latest_files:
        file_dict = {}
        file_dict['comment'] = afile.get_comment()
        file_dict['description'] = afile.get_description()
        file_dict['eformat'] = afile.get_format()
        file_dict['full_name'] = afile.get_full_name()
        file_dict['full_path'] = afile.get_full_path()
        file_dict['magic'] = afile.get_magic()
        file_dict['name'] = afile.get_name()
        file_dict['path'] = afile.get_path()
        file_dict['size'] = afile.get_size()
        file_dict['status'] = afile.get_status()
        file_dict['subformat'] = afile.get_subformat()
        file_dict['superformat'] = afile.get_superformat()
        file_dict['type'] = afile.get_type()
        file_dict['url'] = afile.get_url()
        file_dict['version'] = afile.get_version()
        files.append(file_dict)
    return files
Exemple #52
0
    def _getfile_py(req, recid=0, docid=0, version="", name="", docformat="", ln=CFG_SITE_LANG):
        if not recid:
            ## Let's obtain the recid from the docid
            if docid:
                try:
                    bibdoc = BibDoc(docid=docid)
                    recid = bibdoc.bibrec_links[0]["recid"]
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieve the requested file."), req, ln)
            else:
                return warning_page(_('Not enough information to retrieve the document'), req, ln)
        else:
            brd = BibRecDocs(recid)
            if not name and docid:
                ## Let's obtain the name from the docid
                try:
                    name = brd.get_docname(docid)
                except InvenioBibDocFileError:
                    return warning_page(_("An error has happened in trying to retrieving the requested file."), req, ln)

        docformat = normalize_format(docformat)

        redirect_to_url(req, '%s/%s/%s/files/%s%s?ln=%s%s' % (CFG_SITE_URL, CFG_SITE_RECORD, recid, name, docformat, ln, version and 'version=%s' % version or ''), apache.HTTP_MOVED_PERMANENTLY)
Exemple #53
0
def _bibdoc_file_list(recid):
    import os, os.path
    from invenio.legacy.bibdocfile.api import BibRecDocs
    try:
        recdocs = BibRecDocs(recid)
    except:
        current_app.logger.error("REST API: Error while building BibRecDocs for record %d" % (recid,))
        return []
    files = []
    for d in recdocs.list_bibdocs():
        df = d.list_latest_files()
        if not df:
            continue
        filename = df[0].get_full_name().decode('utf-8')
        docname, doctype = os.path.splitext(filename)
        if doctype.startswith('.'):
            doctype = doctype[1:]
        files.append({
                'id': d.get_id(),
                'name': docname,
                'type': doctype,
                'size': df[0].get_size(),
            })
    return files
def fix_recid(recid, logfile):
    """Fix a given recid."""
    print("Upgrading record %s ->" % recid, end=' ')
    print("Upgrading record %s:" % recid, file=logfile)

    bibrec = BibRecDocs(recid)
    print(bibrec, file=logfile)
    docnames = bibrec.get_bibdoc_names()
    try:
        for docname in docnames:
            print(docname, end=' ')
            new_bibdocs = bibrec.fix(docname)
            new_bibdocnames = [bibrec.get_docname(bibdoc.id) for bibdoc in new_bibdocs]
            if new_bibdocnames:
                print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), end=' ')
                print("(created bibdocs: '%s')" % "', '".join(new_bibdocnames), file=logfile)
    except InvenioBibDocFileError as e:
        print(BibRecDocs(recid), file=logfile)
        print("%s -> ERROR", e)
        return False
    else:
        print(BibRecDocs(recid), file=logfile)
        print("-> OK")
        return True
Exemple #55
0
def download_one(recid, version):
    """Download given version of the PDF from arxiv"""
    write_message("fetching %s" % recid)
    for count, arxiv_id in enumerate(extract_arxiv_ids_from_recid(recid)):
        if count != 0:
            write_message("Warning: %s has multiple arxiv #" % recid)
            continue

        url_for_pdf = build_arxiv_url(arxiv_id, version)
        filename_arxiv_id = arxiv_id.replace("/", "_")
        temp_file = NamedTemporaryFile(
            prefix="arxiv-pdf-checker", dir=CFG_TMPSHAREDDIR, suffix="%s.pdf" % filename_arxiv_id
        )
        write_message("downloading pdf from %s" % url_for_pdf)
        path = download_external_url(url_for_pdf, temp_file.name, content_type="pdf")

        # Check if it is not an html not found page
        filesize = os.path.getsize(path)
        if filesize < 25000:
            f = open(path)
            try:
                for line in f:
                    if "PDF unavailable" in line:
                        raise PdfNotAvailable()
            finally:
                f.close()

        docs = BibRecDocs(recid)
        bibdocfiles = docs.list_latest_files(doctype="arXiv")

        needs_update = False
        try:
            bibdocfile = bibdocfiles[0]
        except IndexError:
            bibdocfile = None
            needs_update = True
        else:
            existing_md5 = calculate_md5(bibdocfile.fullpath)
            new_md5 = calculate_md5(path.encode("utf-8"))
            if new_md5 != existing_md5:
                write_message("md5 differs updating")
                needs_update = True
            else:
                write_message("md5 matches existing pdf, skipping")

        if needs_update:
            if bibdocfiles:
                write_message("adding as new version")
                docs.add_new_version(path, docname=bibdocfile.name)
            else:
                write_message("adding as new file")
                docs.add_new_file(path, doctype="arXiv", docname="arXiv:%s" % filename_arxiv_id)
        else:
            raise FoundExistingPdf()
Exemple #56
0
def format_element(bfo, separator=" ", style='', img_style='', text_style='font-size:small',
                   print_links='yes', max_photos='', show_comment='yes',
                   img_max_width='250px', display_all_version_links='yes'):
    """
    Lists the photos of a record. Display the icon version, linked to
    its original version.

    This element works for photos appended to a record as BibDoc
    files, for which a preview icon has been generated. If there are
    several formats for one photo, use the first one found.

    @param separator: separator between each photo
    @param print_links: if 'yes', print links to the original photo
    @param style: style attributes of the whole image block. Eg: "padding:2px;border:1px"
    @param img_style: style attributes of the images. Eg: "width:50px;border:none"
    @param text_style: style attributes of the text. Eg: "font-size:small"
    @param max_photos: the maximum number of photos to display
    @param show_comment: if 'yes', display the comment of each photo
    @param display_all_version_links: if 'yes', print links to additional (sub)formats
    """
    photos = []
    bibarchive = BibRecDocs(bfo.recID)
    bibdocs = bibarchive.list_bibdocs()

    if max_photos.isdigit():
        max_photos = int(max_photos)
    else:
        max_photos = len(bibdocs)

    for doc in bibdocs[:max_photos]:
        found_icons = []
        found_url = ''
        for docfile in doc.list_latest_files():
            if docfile.is_icon():
                found_icons.append((
                                   docfile.get_size(),
                                   get_relative_url(docfile.get_url())
                                  ))
            else:
                found_url = get_relative_url(docfile.get_url())
        found_icons.sort()

        if found_icons:
            additional_links = ''
            name = bibarchive.get_docname(doc.id)
            comment = doc.list_latest_files()[0].get_comment()

            preview_url = None
            if len(found_icons) > 1:
                preview_url = get_relative_url(found_icons[1][1])
                additional_urls = [(docfile.get_size(), get_relative_url(docfile.get_url()), \
                                    docfile.get_superformat(), docfile.get_subformat()) \
                                   for docfile in doc.list_latest_files() if not docfile.is_icon()]
                additional_urls.sort()
                additional_links = [create_html_link(url, urlargd={}, \
                                                     linkattrd={'style': 'font-size:x-small'}, \
                                                     link_label="%s %s (%s)" % (format.strip('.').upper(), subformat, format_size(size))) \
                                    for (size, url, format, subformat) in additional_urls]
            img = '<img src="%(icon_url)s" alt="%(name)s" style="max-width:%(img_max_width)s;_width:%(img_max_width)s;%(img_style)s" />' % \
                  {'icon_url': cgi.escape(get_relative_url(found_icons[0][1]), True),
                   'name': cgi.escape(name, True),
                   'img_style': img_style,
                   'img_max_width': img_max_width}

            if print_links.lower() == 'yes':
                img = '<a href="%s">%s</a>' % (cgi.escape(preview_url or found_url, True), img)
            if display_all_version_links.lower() == 'yes' and additional_links:
                img += '<br />' + '&nbsp;'.join(additional_links) + '<br />'

            if show_comment.lower() == 'yes' and comment:
                img += '<div style="margin-auto;text-align:center;%(text_style)s">%(comment)s</div>' % \
                       {'comment': comment.replace('\n', '<br/>'),
                        'text_style': text_style}
            img = '<div style="vertical-align: middle;text-align:center;display:inline-block;display: -moz-inline-stack;zoom: 1;*display: inline;max-width:%(img_max_width)s;_width:%(img_max_width)s;text-align:center;%(style)s">%(img)s</div>' % \
                  {'img_max_width': img_max_width,
                   'style': style,
                   'img': img}

            photos.append(img)

    return '<div>' + separator.join(photos) + '</div>'