def test_BibRecDocs(self):
     """bibdocfile - BibRecDocs functions"""
     my_bibrecdoc = BibRecDocs(2)
     #add bibdoc
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg')
     my_bibrecdoc.add_bibdoc(doctype='Main', docname='file', never_fail=False)
     self.assertEqual(len(my_bibrecdoc.list_bibdocs()), 3)
     my_added_bibdoc = my_bibrecdoc.get_bibdoc('file')
     #add bibdocfile in empty bibdoc
     my_added_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif', \
     description= 'added in empty bibdoc', comment=None, format=None, flags=['PERFORM_HIDE_PREVIOUS'])
     #propose unique docname
     self.assertEqual(my_bibrecdoc.propose_unique_docname('file'), 'file_2')
     #has docname
     self.assertEqual(my_bibrecdoc.has_docname_p('file'), True)
     #merge 2 bibdocs
     my_bibrecdoc.merge_bibdocs('img_test', 'file')
     self.assertEqual(len(my_bibrecdoc.get_bibdoc("img_test").list_all_files()), 2)
     #check file exists
     self.assertEqual(my_bibrecdoc.check_file_exists(CFG_PREFIX + '/lib/webtest/invenio/test.jpg'), True)
     #get bibdoc names
     self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[0], '0104007_02')
     self.assertEqual(my_bibrecdoc.get_bibdoc_names('Main')[1],'img_test')
     #get total size
     self.assertEqual(my_bibrecdoc.get_total_size(), 1647591)
     #get total size latest version
     self.assertEqual(my_bibrecdoc.get_total_size_latest_version(), 1647591)
     #display
     value = my_bibrecdoc.display(docname='img_test', version='', doctype='', ln='en', verbose=0, display_hidden=True)
     self.assert_("<small><b>Main</b>" in value)
     #get xml 8564
     value = my_bibrecdoc.get_xml_8564()
     self.assert_('/record/2/files/img_test.jpg</subfield>' in value)
     #check duplicate docnames
     self.assertEqual(my_bibrecdoc.check_duplicate_docnames(), True)
def goto(type, document='', number=0, lang='en', modif=0):
    today = time.strftime('%Y-%m-%d')
    if type == 'SSR':
        ## We would like a CERN Staff Rules and Regulations
        recids = perform_request_search(cc='Staff Rules and Regulations', f="925__a:1996-01-01->%s 925__b:%s->9999-99-99" % (today, today))
        recid = recids[-1]
        reportnumber = get_fieldvalues(recid, '037__a')[0]
        edition = int(reportnumber[-2:]) ## e.g. CERN-STAFF-RULES-ED08
        return BibRecDocs(recid).get_bibdoc(make_cern_ssr_docname(lang, edition, modif)).get_file('.pdf').get_url()
    elif type == "OPER-CIRC":
        recids = perform_request_search(cc="Operational Circulars", p="reportnumber=\"CERN-OPER-CIRC-%s-*\"" % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation_en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation_fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving_en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving_fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex_fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex_en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
    elif type == 'ADMIN-CIRC':
        recids = perform_request_search(cc="Administrative Circulars", p="reportnumber=\"CERN-ADMIN-CIRC-%s-*\"" % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
    def test_BibRecDocs(self):
        """bibdocfile - BibRecDocs functions"""
        from invenio.bibdocfile import BibRecDocs
        my_bibrecdoc = BibRecDocs(2)
        #add bibdoc
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
                                  'Main', 'img_test', False,
                                  'test add new file', 'test', '.jpg')
        my_bibrecdoc.add_bibdoc(doctype='Main',
                                docname='file',
                                never_fail=False)
        self.assertEqual(len(my_bibrecdoc.list_bibdocs()), 3)
        my_added_bibdoc = my_bibrecdoc.get_bibdoc('file')
        #add bibdocfile in empty bibdoc
        my_added_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.gif', \
        description= 'added in empty bibdoc', comment=None, docformat=None, flags=['PERFORM_HIDE_PREVIOUS'])
        #propose unique docname
        self.assertEqual(my_bibrecdoc.propose_unique_docname('file'), 'file_2')
        #has docname
        self.assertEqual(my_bibrecdoc.has_docname_p('file'), True)
        #merge 2 bibdocs
        my_bibrecdoc.merge_bibdocs('img_test', 'file')
        self.assertEqual(
            len(my_bibrecdoc.get_bibdoc("img_test").list_all_files()), 2)
        #check file exists
        self.assertEqual(
            my_bibrecdoc.check_file_exists(
                CFG_PREFIX + '/lib/webtest/invenio/test.jpg', '.jpg'), True)
        #get bibdoc names
        # we can not rely on the order !
        names = set([
            my_bibrecdoc.get_bibdoc_names('Main')[0],
            my_bibrecdoc.get_bibdoc_names('Main')[1]
        ])
        self.assertTrue('0104007_02' in names)
        self.assertTrue('img_test' in names)

        #get total size
        self.assertEqual(my_bibrecdoc.get_total_size(), 1647591)
        #get total size latest version
        self.assertEqual(my_bibrecdoc.get_total_size_latest_version(), 1647591)
        #display
        #value = my_bibrecdoc.display(docname='img_test', version='', doctype='', ln='en', verbose=0, display_hidden=True)
        #self.assert_("<small><b>Main</b>" in value)
        #get xml 8564
        value = my_bibrecdoc.get_xml_8564()
        self.assert_('/' + CFG_SITE_RECORD +
                     '/2/files/img_test.jpg</subfield>' in value)
        #check duplicate docnames
        self.assertEqual(my_bibrecdoc.check_duplicate_docnames(), True)
Ejemplo n.º 4
0
def check_records(records):
    for record in records:
        ## Stupid hack because bibcheck filters does not work as expected
        if record_get_field_value(record, '980', code='b') == "Hindawi":
            record.warn("Working on this record")
            recdoc = BibRecDocs(int(record.record_id))
            doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0])
            try:
                xml_file = open(doc.get_file("xml").get_full_path())
            except:
                record.warn("No document can be found")
                continue
            xml2 = xml.dom.minidom.parseString(xml_file.read())
            subject = get_value_in_tag(xml2, "subject")
            if subject in ["Editorial", "Erratum", "Corrigendum", "Addendum","Letter to the Editor"]:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in ['ERRATUM', 'ADDENDUM', 'EDITORIAL','CORRIGENDUM', 'LETTER TO THE EDITOR']:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, subject.upper())
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', subject.upper())
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', subject.upper())
                        break
            elif subject not in ["Review Article","Research Article","Retraction"]:
                raise Exception("This subject: %s does not exit in SCOAP3 system" % (subject,))
Ejemplo n.º 5
0
 def test_BibDocFiles(self):
     """bibdocfile - BibDocFile functions """
     #add bibdoc
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
                               'Main', 'img_test', False,
                               'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
     #get url
     self.assertEqual(
         my_new_bibdocfile.get_url(),
         CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
     #get type
     self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
     #get path
     self.assert_(
         my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
     #get bibdocid
     self.assertEqual(my_new_bibdocfile.get_bibdocid(),
                      my_new_bibdoc.get_id())
     #get name
     self.assertEqual(my_new_bibdocfile.get_name(), 'img_test')
     #get full name
     self.assertEqual(my_new_bibdocfile.get_full_name(), 'img_test.jpg')
     #get full path
     self.assert_(my_new_bibdocfile.get_full_path().startswith(
         CFG_WEBSUBMIT_FILEDIR))
     self.assert_(
         my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
     #get format
     self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
     #get version
     self.assertEqual(my_new_bibdocfile.get_version(), 1)
     #get description
     self.assertEqual(my_new_bibdocfile.get_description(),
                      my_new_bibdoc.get_description('.jpg', version=1))
     #get comment
     self.assertEqual(my_new_bibdocfile.get_comment(),
                      my_new_bibdoc.get_comment('.jpg', version=1))
     #get recid
     self.assertEqual(my_new_bibdocfile.get_recid(), 2)
     #get status
     self.assertEqual(my_new_bibdocfile.get_status(), '')
     #get size
     self.assertEqual(my_new_bibdocfile.get_size(), 91750)
     #get checksum
     self.assertEqual(my_new_bibdocfile.get_checksum(),
                      '28ec893f9da735ad65de544f71d4ad76')
     #check
     self.assertEqual(my_new_bibdocfile.check(), True)
     #display
     value = my_new_bibdocfile.display(ln='en')
     assert 'files/img_test.jpg?version=1">' in value
     #hidden?
     self.assertEqual(my_new_bibdocfile.hidden_p(), False)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
Ejemplo n.º 6
0
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        from invenio.bibdocfile import BibRecDocs
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        import invenio.template
        tmpl = invenio.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
Ejemplo n.º 7
0
    def test_BibRecDocs(self):
        """bibdocfile - BibRecDocs functions"""
        my_bibrecdoc = BibRecDocs(2)
        # add bibdoc
        my_bibrecdoc.add_new_file(
            CFG_PREFIX + "/lib/webtest/invenio/test.jpg", "Main", "img_test", False, "test add new file", "test", ".jpg"
        )
        my_bibrecdoc.add_bibdoc(doctype="Main", docname="file", never_fail=False)
        self.assertEqual(len(my_bibrecdoc.list_bibdocs()), 3)
        my_added_bibdoc = my_bibrecdoc.get_bibdoc("file")
        # add bibdocfile in empty bibdoc
        my_added_bibdoc.add_file_new_version(
            CFG_PREFIX + "/lib/webtest/invenio/test.gif",
            description="added in empty bibdoc",
            comment=None,
            docformat=None,
            flags=["PERFORM_HIDE_PREVIOUS"],
        )
        # propose unique docname
        self.assertEqual(my_bibrecdoc.propose_unique_docname("file"), "file_2")
        # has docname
        self.assertEqual(my_bibrecdoc.has_docname_p("file"), True)
        # merge 2 bibdocs
        my_bibrecdoc.merge_bibdocs("img_test", "file")
        self.assertEqual(len(my_bibrecdoc.get_bibdoc("img_test").list_all_files()), 2)
        # check file exists
        self.assertEqual(my_bibrecdoc.check_file_exists(CFG_PREFIX + "/lib/webtest/invenio/test.jpg", ".jpg"), True)
        # get bibdoc names
        # we can not rely on the order !
        names = set([my_bibrecdoc.get_bibdoc_names("Main")[0], my_bibrecdoc.get_bibdoc_names("Main")[1]])
        self.assertTrue("0104007_02" in names)
        self.assertTrue("img_test" in names)

        # get total size
        self.assertEqual(my_bibrecdoc.get_total_size(), 1647591)
        # get total size latest version
        self.assertEqual(my_bibrecdoc.get_total_size_latest_version(), 1647591)
        # display
        # value = my_bibrecdoc.display(docname='img_test', version='', doctype='', ln='en', verbose=0, display_hidden=True)
        # self.assert_("<small><b>Main</b>" in value)
        # get xml 8564
        value = my_bibrecdoc.get_xml_8564()
        self.assert_("/" + CFG_SITE_RECORD + "/2/files/img_test.jpg</subfield>" in value)
        # check duplicate docnames
        self.assertEqual(my_bibrecdoc.check_duplicate_docnames(), True)
    def test_BibDocFiles(self):
        """bibdocfile - BibDocFile functions """
        #add bibdoc
        my_bibrecdoc = BibRecDocs(2)
        timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp)

        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
        #get url
        self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/%s/2/files/img_test.jpg' % CFG_SITE_RECORD)
        #get type
        self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
        #get path
        # we should not test for particular path ! this is in the gestion of the underlying implementation,
        # not the interface which should ne tested
        #        self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #        self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
        #get bibdocid
        self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
        #get name
        self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
        #get full name
        self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
        #get full path
        #self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
        #self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
        #get format
        self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
        #get version
        self.assertEqual(my_new_bibdocfile.get_version(), 1)
        #get description
        self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
        #get comment
        self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
        #get recid
        self.assertEqual(my_new_bibdocfile.get_recid(), 2)
        #get status
        self.assertEqual(my_new_bibdocfile.get_status(), '')
        #get size
        self.assertEqual(my_new_bibdocfile.get_size(), 91750)
        #get checksum
        self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
        #check
        self.assertEqual(my_new_bibdocfile.check(), True)
        #display
        tmpl = invenio.template.load("bibdocfile")
        value = tmpl.tmpl_display_bibdocfile(my_new_bibdocfile, ln='en')
        assert 'files/img_test.jpg?version=1">' in value
        #hidden?
        self.assertEqual(my_new_bibdocfile.hidden_p(), False)
        #check modification date
        self.assertEqual(my_new_bibdocfile.md, timestamp)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
 def test_BibDocFiles(self):
     """bibdocfile - BibDocFile functions """
     #add bibdoc
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
     #get url
     self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + '/record/2/files/img_test.jpg')
     #get type
     self.assertEqual(my_new_bibdocfile.get_type(), 'Main')
     #get path
     self.assert_(my_new_bibdocfile.get_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_path().endswith('/img_test.jpg;1'))
     #get bibdocid
     self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
     #get name
     self.assertEqual(my_new_bibdocfile.get_name() , 'img_test')
     #get full name
     self.assertEqual(my_new_bibdocfile.get_full_name() , 'img_test.jpg')
     #get full path
     self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_WEBSUBMIT_FILEDIR))
     self.assert_(my_new_bibdocfile.get_full_path().endswith('/img_test.jpg;1'))
     #get format
     self.assertEqual(my_new_bibdocfile.get_format(), '.jpg')
     #get version
     self.assertEqual(my_new_bibdocfile.get_version(), 1)
     #get description
     self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description('.jpg', version=1))
     #get comment
     self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment('.jpg', version=1))
     #get recid
     self.assertEqual(my_new_bibdocfile.get_recid(), 2)
     #get status
     self.assertEqual(my_new_bibdocfile.get_status(), '')
     #get size
     self.assertEqual(my_new_bibdocfile.get_size(), 91750)
     #get checksum
     self.assertEqual(my_new_bibdocfile.get_checksum(), '28ec893f9da735ad65de544f71d4ad76')
     #check
     self.assertEqual(my_new_bibdocfile.check(), True)
     #display
     value = my_new_bibdocfile.display(ln='en')
     assert 'files/img_test.jpg?version=1">' in value
     #hidden?
     self.assertEqual(my_new_bibdocfile.hidden_p(), False)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
Ejemplo n.º 10
0
def check_records(records):
    for record in records:
        ## Stupid hack because bibcheck filters does not work as expected
        if record_get_field_value(record, '980', code='b') == "Hindawi":
            record.warn("Working on this record")
            recdoc = BibRecDocs(int(record.record_id))
            doc = recdoc.get_bibdoc(recdoc.get_bibdoc_names()[0])
            try:
                xml_file = open(doc.get_file("xml").get_full_path())
            except:
                record.warn("No document can be found")
                continue
            xml2 = xml.dom.minidom.parseString(xml_file.read())
            subject = get_value_in_tag(xml2, "subject")
            if subject in [
                    "Editorial", "Erratum", "Corrigendum", "Addendum",
                    "Letter to the Editor"
            ]:
                field = record_get_field_value(record, '980', code='c')
                if field:
                    if field in [
                            'ERRATUM', 'ADDENDUM', 'EDITORIAL', 'CORRIGENDUM',
                            'LETTER TO THE EDITOR'
                    ]:
                        for position, value in record.iterfield('980__c'):
                            record.amend_field(position, subject.upper())
                            break
                    else:
                        for position, value in record.iterfield('980__%'):
                            record.add_subfield(position, 'c', subject.upper())
                            break
                else:
                    for position, value in record.iterfield('980__%'):
                        record.add_subfield(position, 'c', subject.upper())
                        break
            elif subject not in [
                    "Review Article", "Research Article", "Retraction"
            ]:
                raise Exception(
                    "This subject: %s does not exit in SCOAP3 system" %
                    (subject, ))
Ejemplo n.º 11
0
    def get_record(self, path=None, no_pdf=False,
                   test=False, refextract_callback=None):
        """Convert a record to MARCXML format.

        :param path: path to a record.
        :type path: string
        :param test: flag to determine if it is a test call.
        :type test: bool
        :param refextract_callback: callback to be used to extract
                                    unstructured references. It should
                                    return a marcxml formated string
                                    of the reference.
        :type refextract_callback: callable

        :returns: marcxml formated string.
        """
        xml_doc = self.get_article(path)
        rec = create_record()
        title = self.get_title(xml_doc)
        if title:
            record_add_field(rec, '245', subfields=[('a', title)])
        (journal, dummy, volume, issue, first_page, last_page, year,
         start_date, doi) = self.get_publication_information(xml_doc, path)
        if not journal:
            journal = self.get_article_journal(xml_doc)
        if start_date:
            record_add_field(rec, '260', subfields=[('c', start_date),
                                                    ('t', 'published')])
        else:
            record_add_field(
                rec, '260', subfields=[('c', time.strftime('%Y-%m-%d'))])
        if doi:
            record_add_field(rec, '024', ind1='7', subfields=[('a', doi),
                                                              ('2', 'DOI')])
        license, license_url = self.get_license(xml_doc)
        if license and license_url:
            record_add_field(rec, '540', subfields=[('a', license),
                                                    ('u', license_url)])
        elif license_url:
            record_add_field(rec, '540', subfields=[('u', license_url)])
        self.logger.info("Creating record: %s %s" % (path, doi))
        authors = self.get_authors(xml_doc)
        first_author = True
        for author in authors:
            author_name = (author['surname'], author.get(
                'given_name') or author.get('initials'))
            subfields = [('a', '%s, %s' % author_name)]
            if 'orcid' in author:
                subfields.append(('j', author['orcid']))
            if 'affiliation' in author:
                for aff in author["affiliation"]:
                    subfields.append(('v', aff))

                if self.extract_nations:
                    add_nations_field(subfields)

            if author.get('email'):
                subfields.append(('m', author['email']))
            if first_author:
                record_add_field(rec, '100', subfields=subfields)
                first_author = False
            else:
                record_add_field(rec, '700', subfields=subfields)

        abstract = self.get_abstract(xml_doc)
        if abstract:
            record_add_field(rec, '520', subfields=[('a', abstract),
                                                    ('9', 'Elsevier')])
        record_copyright = self.get_copyright(xml_doc)
        if record_copyright:
            record_add_field(rec, '542', subfields=[('f', record_copyright)])
        keywords = self.get_keywords(xml_doc)
        if self.CONSYN:
            for tag in xml_doc.getElementsByTagName('ce:collaboration'):
                collaboration = get_value_in_tag(tag, 'ce:text')
                if collaboration:
                    record_add_field(rec, '710',
                                     subfields=[('g', collaboration)])

            # We add subjects also as author keywords
            subjects = xml_doc.getElementsByTagName('dct:subject')
            for subject in subjects:
                for listitem in subject.getElementsByTagName('rdf:li'):
                    keyword = xml_to_text(listitem)
                    if keyword not in keywords:
                        keywords.append(keyword)
            if keywords:
                for keyword in keywords:
                    record_add_field(rec, '653', ind1='1',
                                     subfields=[('a', keyword),
                                                ('9', 'author')])
            journal, dummy = fix_journal_name(journal.strip(),
                                              self.journal_mappings)
            subfields = []
            doctype = self.get_doctype(xml_doc)
            try:
                page_count = int(last_page) - int(first_page) + 1
                record_add_field(rec, '300',
                                 subfields=[('a', str(page_count))])
            except ValueError:  # do nothing
                pass
            if doctype == 'err':
                subfields.append(('m', 'Erratum'))
            elif doctype == 'add':
                subfields.append(('m', 'Addendum'))
            elif doctype == 'pub':
                subfields.append(('m', 'Publisher Note'))
            elif doctype == 'rev':
                record_add_field(rec, '980', subfields=[('a', 'Review')])
            if journal:
                subfields.append(('p', journal))
            if first_page and last_page:
                subfields.append(('c', '%s-%s' %
                                       (first_page, last_page)))
            elif first_page:
                subfields.append(('c', first_page))
            if volume:
                subfields.append(('v', volume))
            if year:
                subfields.append(('y', year))
            record_add_field(rec, '773', subfields=subfields)
            if not test:
                if license:
                    url = 'http://www.sciencedirect.com/science/article/pii/'\
                          + path.split('/')[-1][:-4]
                    record_add_field(rec, '856', ind1='4',
                                     subfields=[('u', url),
                                                ('y', 'Elsevier server')])
                    record_add_field(rec, 'FFT', subfields=[('a', path),
                                                            ('t', 'INSPIRE-PUBLIC'),
                                                            ('d', 'Fulltext')])
                else:
                    record_add_field(rec, 'FFT', subfields=[('a', path),
                                                            ('t', 'Elsevier'),
                                                            ('o', 'HIDDEN')])
            record_add_field(rec, '980', subfields=[('a', 'HEP')])
            record_add_field(rec, '980', subfields=[('a', 'Citeable')])
            record_add_field(rec, '980', subfields=[('a', 'Published')])
            self._add_references(xml_doc, rec, refextract_callback)
        else:
            licence = 'http://creativecommons.org/licenses/by/3.0/'
            record_add_field(rec,
                             '540',
                             subfields=[('a', 'CC-BY-3.0'), ('u', licence)])
            if keywords:
                for keyword in keywords:
                    record_add_field(
                        rec, '653', ind1='1', subfields=[('a', keyword),
                                    ('9', 'author')])

            pages = ''
            if first_page and last_page:
                pages = '{0}-{1}'.format(first_page, last_page)
            elif first_page:
                pages = first_page

            subfields = filter(lambda x: x[1] and x[1] != '-', [('p', journal),
                                                                ('v', volume),
                                                                ('n', issue),
                                                                ('c', pages),
                                                                ('y', year)])

            record_add_field(rec, '773', subfields=subfields)
            if not no_pdf:
                from invenio.search_engine import perform_request_search
                query = '0247_a:"%s" AND NOT 980:DELETED"' % (doi,)
                prev_version = perform_request_search(p=query)

                old_pdf = False

                if prev_version:
                    from invenio.bibdocfile import BibRecDocs
                    prev_rec = BibRecDocs(prev_version[0])
                    try:
                        pdf_path = prev_rec.get_bibdoc('main')
                        pdf_path = pdf_path.get_file(
                            ".pdf;pdfa", exact_docformat=True)
                        pdf_path = pdf_path.fullpath
                        old_pdf = True
                        record_add_field(rec, 'FFT',
                                         subfields=[('a', pdf_path),
                                                    ('n', 'main'),
                                                    ('f', '.pdf;pdfa')])
                        message = ('Leaving previously delivered PDF/A for: '
                                   + doi)
                        self.logger.info(message)
                    except:
                        pass
                try:
                    if exists(join(path, 'main_a-2b.pdf')):
                        pdf_path = join(path, 'main_a-2b.pdf')
                        record_add_field(rec, 'FFT',
                                         subfields=[('a', pdf_path),
                                                    ('n', 'main'),
                                                    ('f', '.pdf;pdfa')])
                        self.logger.debug('Adding PDF/A to record: %s'
                                          % (doi,))
                    elif exists(join(path, 'main.pdf')):
                        pdf_path = join(path, 'main.pdf')
                        record_add_field(rec, 'FFT', subfields=[('a', pdf_path)])
                    else:
                        if not old_pdf:
                            message = "Record " + doi
                            message += " doesn't contain PDF file."
                            self.logger.warning(message)
                            raise MissingFFTError(message)
                except MissingFFTError:
                    message = "Elsevier paper: %s is missing PDF." % (doi,)
                    register_exception(alert_admin=True, prefix=message)
                version = self.get_elsevier_version(find_package_name(path))
                record_add_field(rec, '583', subfields=[('l', version)])
                xml_path = join(path, 'main.xml')
                record_add_field(rec, 'FFT', subfields=[('a', xml_path)])
                record_add_field(rec, '980', subfields=[('a', 'SCOAP3'),
                                                        ('b', 'Elsevier')])
        try:
            return record_xml_output(rec)
        except UnicodeDecodeError:
            message = "Found a bad char in the file for the article " + doi
            sys.stderr.write(message)
            return ""
 def test_BibDocFiles(self):
     """bibdocfile - BibDocFile functions """
     # add bibdoc
     my_bibrecdoc = BibRecDocs(2)
     timestamp = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
     my_bibrecdoc.add_new_file(
         CFG_PREFIX + "/lib/webtest/invenio/test.jpg",
         "Main",
         "img_test",
         False,
         "test add new file",
         "test",
         ".jpg",
         modification_date=timestamp,
     )
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     my_new_bibdocfile = my_new_bibdoc.list_all_files()[0]
     # get url
     self.assertEqual(my_new_bibdocfile.get_url(), CFG_SITE_URL + "/%s/2/files/img_test.jpg" % CFG_SITE_RECORD)
     # get type
     self.assertEqual(my_new_bibdocfile.get_type(), "Main")
     # get path
     self.assert_(my_new_bibdocfile.get_path().startswith(CFG_BIBDOCFILE_FILEDIR))
     self.assert_(my_new_bibdocfile.get_path().endswith("/img_test.jpg;1"))
     # get bibdocid
     self.assertEqual(my_new_bibdocfile.get_bibdocid(), my_new_bibdoc.get_id())
     # get name
     self.assertEqual(my_new_bibdocfile.get_name(), "img_test")
     # get full name
     self.assertEqual(my_new_bibdocfile.get_full_name(), "img_test.jpg")
     # get full path
     self.assert_(my_new_bibdocfile.get_full_path().startswith(CFG_BIBDOCFILE_FILEDIR))
     self.assert_(my_new_bibdocfile.get_full_path().endswith("/img_test.jpg;1"))
     # get format
     self.assertEqual(my_new_bibdocfile.get_format(), ".jpg")
     # get version
     self.assertEqual(my_new_bibdocfile.get_version(), 1)
     # get description
     self.assertEqual(my_new_bibdocfile.get_description(), my_new_bibdoc.get_description(".jpg", version=1))
     # get comment
     self.assertEqual(my_new_bibdocfile.get_comment(), my_new_bibdoc.get_comment(".jpg", version=1))
     # get recid
     self.assertEqual(my_new_bibdocfile.get_recid(), 2)
     # get status
     self.assertEqual(my_new_bibdocfile.get_status(), "")
     # get size
     self.assertEqual(my_new_bibdocfile.get_size(), 91750)
     # get checksum
     self.assertEqual(my_new_bibdocfile.get_checksum(), "28ec893f9da735ad65de544f71d4ad76")
     # check
     self.assertEqual(my_new_bibdocfile.check(), True)
     # display
     value = my_new_bibdocfile.display(ln="en")
     assert 'files/img_test.jpg?version=1">' in value
     # hidden?
     self.assertEqual(my_new_bibdocfile.hidden_p(), False)
     # check modification date
     self.assertEqual(my_new_bibdocfile.md, timestamp)
     # delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
 def test_BibDocs(self):
     """bibdocfile - BibDocs functions"""
     # add file
     my_bibrecdoc = BibRecDocs(2)
     timestamp1 = datetime(*(time.strptime("2011-10-09 08:07:06", "%Y-%m-%d %H:%M:%S")[:6]))
     my_bibrecdoc.add_new_file(
         CFG_PREFIX + "/lib/webtest/invenio/test.jpg",
         "Main",
         "img_test",
         False,
         "test add new file",
         "test",
         ".jpg",
         modification_date=timestamp1,
     )
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     value = my_bibrecdoc.list_bibdocs()
     self.assertEqual(len(value), 2)
     # get total file (bibdoc)
     self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
     # get recid
     self.assertEqual(my_new_bibdoc.get_recid(), 2)
     # change name
     my_new_bibdoc.change_name("new_name")
     # get docname
     self.assertEqual(my_new_bibdoc.get_docname(), "new_name")
     # get type
     self.assertEqual(my_new_bibdoc.get_type(), "Main")
     # get id
     self.assert_(my_new_bibdoc.get_id() > 80)
     # set status
     my_new_bibdoc.set_status("new status")
     # get status
     self.assertEqual(my_new_bibdoc.get_status(), "new status")
     # get base directory
     self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_BIBDOCFILE_FILEDIR))
     # get file number
     self.assertEqual(my_new_bibdoc.get_file_number(), 1)
     # add file new version
     timestamp2 = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
     my_new_bibdoc.add_file_new_version(
         CFG_PREFIX + "/lib/webtest/invenio/test.jpg",
         description="the new version",
         comment=None,
         format=None,
         flags=["PERFORM_HIDE_PREVIOUS"],
         modification_date=timestamp2,
     )
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
     # revert
     timestamp3 = datetime.now()
     time.sleep(2)  # so we can see a difference between now() and the time of the revert
     my_new_bibdoc.revert(1)
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
     self.assertEqual(my_new_bibdoc.get_description(".jpg", version=3), "test add new file")
     # get total size latest version
     self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
     # get latest version
     self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
     # list latest files
     self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
     self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
     # list version files
     self.assertEqual(len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
     # display
     value = my_new_bibdoc.display(version="", ln="en", display_hidden=True)
     self.assert_(">test add new file<" in value)
     # format already exist
     self.assertEqual(my_new_bibdoc.format_already_exists_p(".jpg"), True)
     # get file
     self.assertEqual(my_new_bibdoc.get_file(".jpg", version="1").get_version(), 1)
     # set description
     my_new_bibdoc.set_description("new description", ".jpg", version=1)
     # get description
     self.assertEqual(my_new_bibdoc.get_description(".jpg", version=1), "new description")
     # set comment
     my_new_bibdoc.set_description("new comment", ".jpg", version=1)
     # get comment
     self.assertEqual(my_new_bibdoc.get_description(".jpg", version=1), "new comment")
     # get history
     assert len(my_new_bibdoc.get_history()) > 0
     # check modification date
     self.assertEqual(my_new_bibdoc.get_file(".jpg", version=1).md, timestamp1)
     self.assertEqual(my_new_bibdoc.get_file(".jpg", version=2).md, timestamp2)
     assert my_new_bibdoc.get_file(".jpg", version=3).md > timestamp3
     # delete file
     my_new_bibdoc.delete_file(".jpg", 2)
     # list all files
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     # delete file
     my_new_bibdoc.delete_file(".jpg", 3)
     # add new format
     timestamp4 = datetime(*(time.strptime("2012-11-10 09:08:07", "%Y-%m-%d %H:%M:%S")[:6]))
     my_new_bibdoc.add_file_new_format(
         CFG_PREFIX + "/lib/webtest/invenio/test.gif",
         version=None,
         description=None,
         comment=None,
         format=None,
         modification_date=timestamp4,
     )
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     # check modification time
     self.assertEqual(my_new_bibdoc.get_file(".jpg", version=1).md, timestamp1)
     self.assertEqual(my_new_bibdoc.get_file(".gif", version=1).md, timestamp4)
     # delete file
     my_new_bibdoc.delete_file(".jpg", 1)
     # delete file
     my_new_bibdoc.delete_file(".gif", 1)
     # empty bibdoc
     self.assertEqual(my_new_bibdoc.empty_p(), True)
     # hidden?
     self.assertEqual(my_new_bibdoc.hidden_p(".jpg", version=1), False)
     # hide
     my_new_bibdoc.set_flag("HIDDEN", ".jpg", version=1)
     # hidden?
     self.assertEqual(my_new_bibdoc.hidden_p(".jpg", version=1), True)
     # add and get icon
     my_new_bibdoc.add_icon(CFG_PREFIX + "/lib/webtest/invenio/icon-test.gif", modification_date=timestamp4)
     value = my_bibrecdoc.list_bibdocs()[1]
     self.assertEqual(value.get_icon(), my_new_bibdoc.get_icon())
     # check modification time
     self.assertEqual(my_new_bibdoc.get_icon().md, timestamp4)
     # delete icon
     my_new_bibdoc.delete_icon()
     # get icon
     self.assertEqual(my_new_bibdoc.get_icon(), None)
     # delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
     # undelete
     my_new_bibdoc.undelete(previous_status="")
     # expunging
     my_new_bibdoc.expunge()
     my_bibrecdoc.build_bibdoc_list()
     self.failIf("new_name" in my_bibrecdoc.get_bibdoc_names())
     self.failUnless(my_bibrecdoc.get_bibdoc_names())
Ejemplo n.º 14
0
def Stamp_Replace_Single_File_Approval(parameters, \
                                       curdir, \
                                       form, \
                                       user_info=None):
    """
    This function is intended to be called when a document has been
    approved and needs to be stamped.
    The function should be used when there is ONLY ONE file to be
    stamped after approval (for example, the "main file").
    The name of the file to be stamped should be known and should be stored
    in a file in the submission's working directory (without the extension).
    Generally, this will work our fine as the main file is named after the
    report number of the document, this will be stored in the report number
    file.

    @param parameters: (dictionary) - must contain:

         + latex_template: (string) - the name of the LaTeX template that
            should be used for the creation of the stamp.

         + latex_template_vars: (string) - a string-ified dictionary
            of variables to be replaced in the LaTeX template and the
            values (or names of files in curdir containing the values)
            with which to replace them. Use prefix 'FILE:' to specify
            that the stamped value must be read from a file in
            submission directory instead of being a fixed value to
            stamp.
            E.G.:
               { 'TITLE' : 'FILE:DEMOTHESIS_TITLE',
                 'DATE'  : 'FILE:DEMOTHESIS_DATE'
               }

         + file_to_be_stamped: (string) - this is the name of a file in the
            submission's working directory that contains the name of the
            bibdocfile that is to be stamped.

         + new_file_name: (string) - this is the name of a file in the
            submission's working directory that contains the name that is to
            be given to the file after it has been stamped. If empty, or if
            that file doesn't exist, the file will not be renamed after
            stamping.

         + switch_file: (string) - when this value is set, specifies
            the name of a file that will swith on/off the
            stamping. The stamp will be applied if the file exists in
            the submission directory and is not empty. If the file
            cannot be found or is empty, the stamp is not applied.
            Useful for eg. if you want to let your users control the
            stamping with a checkbox on your submission page.
            Leave this parameter empty to always stamp by default.

         + stamp: (string) - the type of stamp to be applied to the file.
            should be one of:
              + first (only the first page is stamped);
              + all (all pages are stamped);
              + coverpage (a separate cover-page is added to the file as a
                 first page);

         + layer: (string) - the position of the stamp. Should be one of:
              + background (invisible if original file has a white
                -not transparent- background layer)
              + foreground (on top of the stamped file.  If the stamp
                does not have a transparent background, will hide all
                of the document layers)
           The default value is 'background'.
    """
    ############
    ## Definition of important variables:
    ############
    ## The file stamper needs to be called with a dictionary of options of
    ## the following format:
    ##  { 'latex-template'      : "", ## TEMPLATE_NAME
    ##    'latex-template-var'  : {}, ## TEMPLATE VARIABLES
    ##    'input-file'          : "", ## INPUT FILE
    ##    'output-file'         : "", ## OUTPUT FILE
    ##    'stamp'               : "", ## STAMP TYPE
    ##    'layer'               : "", ## LAYER TO STAMP
    ##    'verbosity'           : 0,  ## VERBOSITY (we don't care about it)
    ##  }
    file_stamper_options = {
        'latex-template': "",
        'latex-template-var': {},
        'input-file': "",
        'output-file': "",
        'stamp': "",
        'layer': "",
        'verbosity': 0,
    }

    ## Check if stamping is enabled
    switch_file = parameters.get('switch_file', '')
    if switch_file:
        # Good, a "switch file" was specified. Check if it exists, and
        # it its value is not empty.
        if not _read_in_file(os.path.join(curdir, switch_file)):
            # File does not exist, or is emtpy. Silently abort
            # stamping.
            return ""

    ## Submission access number:
    access = _read_in_file("%s/access" % curdir)
    ## record ID for the current submission. It is found in the special file
    ## "SN" (sysno) in curdir:
    recid = _read_in_file("%s/SN" % curdir)
    try:
        recid = int(recid)
    except ValueError:
        ## No record ID. Cannot continue.
        err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                  "Cannot recover record ID from the submission's working " \
                  "directory. Stamping cannot be carried out. The " \
                  "submission ID is [%s]." % cgi.escape(access)
        register_exception(prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)
    ############
    ## Resolution of function parameters:
    ############
    ## The name of the LaTeX template to be used for stamp creation:
    latex_template = "%s" % ((type(parameters['latex_template']) is str \
                              and parameters['latex_template']) or "")
    ## A string containing the variables/values that should be substituted
    ## in the final (working) LaTeX template:
    latex_template_vars_string = "%s" % \
                       ((type(parameters['latex_template_vars']) is str \
                         and parameters['latex_template_vars']) or "")
    ## The type of stamp to be applied to the file(s):
    stamp = "%s" % ((type(parameters['stamp']) is str and \
                     parameters['stamp'].lower()) or "")
    ## The layer to use for stamping:
    try:
        layer = parameters['layer']
    except KeyError:
        layer = "background"
    if not layer in ('background', 'foreground'):
        layer = "background"
    ## Get the name of the file to be stamped from the file indicated in
    ## the file_to_be_stamped parameter:
    try:
        file_to_stamp_file = parameters['file_to_be_stamped']
    except KeyError:
        file_to_stamp_file = ""
    else:
        if file_to_stamp_file is None:
            file_to_stamp_file = ""
    ## Get the "basename" for the file to be stamped (it's mandatory that it
    ## be in curdir):
    file_to_stamp_file = os.path.basename(file_to_stamp_file).strip()
    name_file_to_stamp = _read_in_file("%s/%s" % (curdir, file_to_stamp_file))
    name_file_to_stamp.replace("\n", "").replace("\r", "")
    ##
    ## Get the name to be given to the file after it has been stamped (if there
    ## is one.) Once more, it will be found in a file in curdir:
    try:
        new_file_name_file = parameters['new_file_name']
    except KeyError:
        new_file_name_file = ""
    else:
        if new_file_name_file is None:
            new_file_name_file = ""
    ## Get the "basename" for the file containing the new file name. (It's
    ## mandatory that it be in curdir):
    new_file_name_file = os.path.basename(new_file_name_file).strip()
    new_file_name = _read_in_file("%s/%s" % (curdir, new_file_name_file))

    ############
    ## Begin:
    ############
    ##
    ## If no name for the file to stamp, warning.
    if name_file_to_stamp == "":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a valid name for the " \
                  "file to be stamped. Stamping could not, therefore, be " \
                  "carried out. The submission ID is [%s]." \
                  % access
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ##
    ## The file to be stamped is a bibdoc. We will only stamp it (a) if it
    ## exists; and (b) if it is a PDF file. So, get the path (in the bibdocs
    ## tree) to the file to be stamped:
    ##
    ## First get the object representing the bibdocs belonging to this record:
    bibrecdocs = BibRecDocs(recid)
    try:
        bibdoc_file_to_stamp = bibrecdocs.get_bibdoc("%s" % name_file_to_stamp)
    except InvenioBibDocFileError:
        ## Couldn't get a bibdoc object for this filename. Probably the file
        ## that we wanted to stamp wasn't attached to this record.
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a bibdoc object for the " \
                  "filename [%s] when trying to stamp the main file. " \
                  "Stamping could not be carried out. The submission ID is " \
                  "[%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ## Get the BibDocFile object for the PDF version of the bibdoc to be
    ## stamped:
    try:
        bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("pdf")
    except InvenioBibDocFileError:
        ## This bibdoc doesn't have a physical file with the extension ".pdf"
        ## (take note of the lower-case extension - the bibdocfile library
        ## is case-sensitive with respect to filenames.  Log that there was
        ## no "pdf" and check for a file with extension "PDF":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It wasn't possible to recover a PDF BibDocFile object " \
                  "for the file with the name [%s], using the extension " \
                  "[pdf] - note the lower case - the bibdocfile library " \
                  "relies upon the case of an extension. The submission ID " \
                  "is [%s] and the record ID is [%s]. Going to try " \
                  "looking for a file with a [PDF] extension before giving " \
                  "up . . . " \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        try:
            bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("PDF")
        except InvenioBibDocFileError:
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "It wasn't possible to recover a PDF " \
                      "BibDocFile object for the file with the name [%s], " \
                      "using the extension [PDF] - note the upper case. " \
                      "Had previously tried searching for [pdf] - now " \
                      "giving up. Stamping could not be carried out. " \
                      "The submission ID is [%s] and the record ID is [%s]." \
                      % (name_file_to_stamp, access, recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ############
    ## Go ahead and prepare the details for the LaTeX stamp template and its
    ## variables:
    ############
    ## Strip the LaTeX filename into the basename (All templates should be
    ## in the template repository):
    latex_template = os.path.basename(latex_template)

    ## Convert the string of latex template variables into a dictionary
    ## of search-term/replacement-term pairs:
    latex_template_vars = get_dictionary_from_string(
        latex_template_vars_string)
    ## For each of the latex variables, check in `CURDIR' for a file with that
    ## name. If found, use it's contents as the template-variable's value.
    ## If not, just use the raw value string already held by the template
    ## variable:
    latex_template_varnames = latex_template_vars.keys()
    for varname in latex_template_varnames:
        ## Get this variable's value:
        varvalue = latex_template_vars[varname].strip()
        if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \
                (varvalue.find("include(") == 0 and varvalue[-1] == ")")) \
                and varvalue != "":
            ## We don't want to interfere with date() or include() directives,
            ## so we only do this if the variable value didn't contain them:
            ##
            ## Is this variable value the name of a file in the current
            ## submission's working directory, from which a literal value for
            ## use in the template should be extracted? If yes, it will
            ## begin with "FILE:". If no, we leave the value exactly as it is.
            if varvalue.upper().find("FILE:") == 0:
                ## The value to be used is to be taken from a file. Clean the
                ## file name and if it's OK, extract that value from the file.
                ##
                seekvalue_fname = varvalue[5:].strip()
                seekvalue_fname = os.path.basename(seekvalue_fname).strip()
                if seekvalue_fname != "":
                    ## Attempt to extract the value from the file:
                    if os.access("%s/%s" % (curdir, seekvalue_fname), \
                                 os.R_OK|os.F_OK):
                        ## The file exists. Extract its value:
                        try:
                            repl_file_val = \
                              open("%s/%s" \
                                   % (curdir, seekvalue_fname), "r").readlines()
                        except IOError:
                            ## The file was unreadable.
                            err_msg = "Error in Stamp_Replace_Single_File_" \
                                      "Approval: The function attempted to " \
                                      "read a LaTex template variable " \
                                      "value from the following file in the " \
                                      "current submission's working " \
                                      "directory: [%s]. However, an " \
                                      "unexpected error was encountered " \
                                      "when doing so. Please inform the " \
                                      "administrator." \
                                      % seekvalue_fname
                            register_exception(req=user_info['req'])
                            raise InvenioWebSubmitFunctionError(err_msg)
                        else:
                            final_varval = ""
                            for line in repl_file_val:
                                final_varval += line
                            final_varval = final_varval.rstrip()
                            ## Replace the variable value with that which has
                            ## been read from the file:
                            latex_template_vars[varname] = final_varval
                    else:
                        ## The file didn't actually exist in the current
                        ## submission's working directory. Use an empty
                        ## value:
                        latex_template_vars[varname] = ""
                else:
                    ## The filename was not valid.
                    err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                              "The function was configured to read a LaTeX " \
                              "template variable from a file with the " \
                              "following instruction: [%s --> %s]. The " \
                              "filename, however, was not considered valid. " \
                              "Please report this to the administrator." \
                              % (varname, varvalue)
                    raise InvenioWebSubmitFunctionError(err_msg)

    ## Put the 'fixed' values into the file_stamper_options dictionary:
    file_stamper_options['latex-template'] = latex_template
    file_stamper_options['latex-template-var'] = latex_template_vars
    file_stamper_options['stamp'] = stamp
    file_stamper_options['layer'] = layer

    ## Put the input file and output file into the file_stamper_options
    ## dictionary:
    file_stamper_options['input-file'] = bibdocfile_file_to_stamp.fullpath
    file_stamper_options[
        'output-file'] = bibdocfile_file_to_stamp.get_full_name()
    ##
    ## Before attempting to stamp the file, log the dictionary of arguments
    ## that will be passed to websubmit_file_stamper:
    try:
        fh_log = open("%s/websubmit_file_stamper-calls-options.log" \
                      % curdir, "a+")
        fh_log.write("%s\n" % file_stamper_options)
        fh_log.flush()
        fh_log.close()
    except IOError:
        ## Unable to log the file stamper options.
        exception_prefix = "Unable to write websubmit_file_stamper " \
                           "options to log file " \
                           "%s/websubmit_file_stamper-calls-options.log" \
                           % curdir
        register_exception(prefix=exception_prefix)

    try:
        ## Try to stamp the file:
        (stamped_file_path_only, stamped_file_name) = \
                websubmit_file_stamper.stamp_file(file_stamper_options)
    except InvenioWebSubmitFileStamperError:
        ## It wasn't possible to stamp this file.
        ## Register the exception along with an informational message:
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "There was a problem stamping the file with the name [%s] " \
                  "and the fullpath [%s]. The file has not been stamped. " \
                  "The submission ID is [%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, \
                     file_stamper_options['input-file'], \
                     access, \
                     recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    else:
        ## Stamping was successful. The BibDocFile must now be revised with
        ## the latest (stamped) version of the file:
        file_comment = "Stamped by WebSubmit: %s" \
                       % time.strftime("%d/%m/%Y", time.localtime())
        try:
            dummy = \
                  bibrecdocs.add_new_version("%s/%s" \
                                             % (stamped_file_path_only, \
                                                stamped_file_name), \
                                                name_file_to_stamp, \
                                                comment=file_comment, \
                                                flags=('STAMPED', ))
        except InvenioBibDocFileError:
            ## Unable to revise the file with the newly stamped version.
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "After having stamped the file with the name [%s] " \
                      "and the fullpath [%s], it wasn't possible to revise " \
                      "that file with the newly stamped version. Stamping " \
                      "was unsuccessful. The submission ID is [%s] and the " \
                      "record ID is [%s]." \
                      % (name_file_to_stamp, \
                         file_stamper_options['input-file'], \
                         access, \
                         recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
        else:
            ## File revised. If the file should be renamed after stamping,
            ## do so.
            if new_file_name != "":
                try:
                    bibrecdocs.change_name(newname=new_file_name,
                                           docid=bibdoc_file_to_stamp.id)
                except (IOError, InvenioBibDocFileError):
                    ## Unable to change the name
                    wrn_msg = "Warning in Stamp_Replace_Single_File_Approval" \
                              ": After having stamped and revised the file " \
                              "with the name [%s] and the fullpath [%s], it " \
                              "wasn't possible to rename it to [%s]. The " \
                              "submission ID is [%s] and the record ID is " \
                              "[%s]." \
                              % (name_file_to_stamp, \
                                 file_stamper_options['input-file'], \
                                 new_file_name, \
                                 access, \
                                 recid)
    ## Finished.
    return ""
Ejemplo n.º 15
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """
    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_' + str(batch_job['recid']) + '_' + str(
            uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR,
                                    xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description',
                                       description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat',
                                     subformat)
                if (comment == m_comment and description == m_description
                        and subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(
                            124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found" %
                                batch_job['recid'])
            task_update_progress("Video master for record %d not found" %
                                 batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(
        batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job,
                                            'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc" %
                                bibdoc_video_docname)
            master_format = compose_format(
                bibdoc_video_extension,
                getval(batch_job, 'bibdoc_master_subformat', 'master'))
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                batch_job['input'],
                version=1,
                description=getval(batch_job, 'bibdoc_master_description'),
                comment=getval(batch_job, 'bibdoc_master_comment'),
                docformat=master_format)

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------" %
                            (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(
                job['bibdoc_docname']).safe_substitute(
                    {'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(
                    job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname',
                                                bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension)
            _task_write_message(
                "Transcoding %s to %s;%s" %
                (bibdoc_slave_video_docname, bibdoc_video_extension,
                 bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                input_file=batch_job['input'],
                output_file=bibdoc_video_fullpath,
                acodec=getval(job, 'audiocodec'),
                vcodec=getval(job, 'videocodec'),
                abitrate=getval(job, 'videobitrate'),
                vbitrate=getval(job, 'audiobitrate'),
                resolution=getval(job, 'resolution'),
                passes=getval(job, 'passes', 1),
                special=getval(job, 'special'),
                specialfirst=getval(job, 'specialfirst'),
                specialsecond=getval(job, 'specialsecond'),
                metadata=getval(job, 'metadata'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),  # Aspect for every job
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
                message_fnc=_task_write_message)
            return_code &= encoding_result
            ## only on success
            if encoding_result:
                ## Rename it, adding the subformat
                os.rename(
                    bibdoc_video_fullpath,
                    compose_file(bibdoc_video_directory,
                                 bibdoc_video_extension,
                                 bibdoc_video_subformat, 1,
                                 bibdoc_slave_video_docname))
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                             bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(
                        getval(job, 'bibdoc_description'), bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname',
                                          bibdoc_video_docname)
            tmpfname = (
                tmpdir + "/" + bibdoc_frame_docname + '.' +
                getval(profile, 'extension', getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(
                input_file=batch_job['input'],
                output_file=tmpfname,
                size=getval(job, 'size'),
                positions=getval(job, 'positions'),
                numberof=getval(job, 'numberof'),
                width=getval(job, 'width'),
                height=getval(job, 'height'),
                aspect=getval(batch_job, 'aspect'),
                profile=getval(job, 'profile'),
                update_fnc=_task_update_overall_status,
            )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(
                        filename)
                    _task_write_message("Creating new bibdoc for %s" %
                                        bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(
                            docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(
                        bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(
                            bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc" %
                                        (bibdoc_frame_docname,
                                         getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                        fname,
                        version=1,
                        description=getval(job, 'bibdoc_description'),
                        comment=getval(job, 'bibdoc_comment'),
                        docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file=getval(batch_job, 'input'),
                                 pbcoreIdentifier=batch_job['recid'],
                                 aspect_override=getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern',
                      '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str())):
                _notify_error_admin(batch_job, getval(batch_job,
                                                      'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(
                getval(batch_job, 'notify_user'),
                getval(batch_job, 'submission_filename', batch_job['input']),
                getval(batch_job, 'recid'),
                getval(batch_job, 'submission_title', ""))
    return 1
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content, restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0:
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError, err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename,
                                                 extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath):
                    bibdoc = bibrecdocs.add_new_file(
                        fullpath,
                        doctype=paths_and_doctypes.get(path, path),
                        never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioWebSubmitFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file':
                                    fullpath,
                                    'icon-scale':
                                    iconsize,
                                    'icon-name':
                                    None,
                                    'icon-file-format':
                                    None,
                                    'multipage-icon':
                                    False,
                                    'multipage-icon-delay':
                                    100,
                                    'verbosity':
                                    0,
                                })
                            except Exception, e:
                                register_exception(
                                    prefix=
                                    'Impossible to create icon for %s (record %s)'
                                    % (fullpath, sysno),
                                    alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioWebSubmitFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace(
                                        '>', '').replace('<', '').replace(
                                            '^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(
                                            iconpath,
                                            subformat=
                                            CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT
                                            + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open(
                                                "%s/%s_ICON" % (curdir, path),
                                                "w")
                                        else:
                                            fd = open(
                                                "%s/%s_ICON_%s" %
                                                (curdir, path,
                                                 iconsize + '_' + icon_suffix),
                                                "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError, err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(
                                            msg)
                                except InvenioWebSubmitFileError, e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()
 def test_BibDocs(self):
     """bibdocfile - BibDocs functions"""
     #add file
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
                               'Main', 'img_test', False,
                               'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     value = my_bibrecdoc.list_bibdocs()
     self.assertEqual(len(value), 2)
     #get total file (bibdoc)
     self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
     #get recid
     self.assertEqual(my_new_bibdoc.get_recid(), 2)
     #change name
     my_new_bibdoc.change_name('new_name')
     #get docname
     self.assertEqual(my_new_bibdoc.get_docname(), 'new_name')
     #get type
     self.assertEqual(my_new_bibdoc.get_type(), 'Main')
     #get id
     self.assert_(my_new_bibdoc.get_id() > 80)
     #set status
     my_new_bibdoc.set_status('new status')
     #get status
     self.assertEqual(my_new_bibdoc.get_status(), 'new status')
     #get base directory
     self.assert_(
         my_new_bibdoc.get_base_dir().startswith(CFG_WEBSUBMIT_FILEDIR))
     #get file number
     self.assertEqual(my_new_bibdoc.get_file_number(), 1)
     #add file new version
     my_new_bibdoc.add_file_new_version(CFG_PREFIX +
                                        '/lib/webtest/invenio/test.jpg',
                                        description='the new version',
                                        comment=None,
                                        format=None,
                                        flags=["PERFORM_HIDE_PREVIOUS"])
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
     #revert
     my_new_bibdoc.revert(1)
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3),
                      'test add new file')
     #get total size latest version
     self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
     #get latest version
     self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
     #list latest files
     self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
     self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
     #list version files
     self.assertEqual(
         len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
     #display
     value = my_new_bibdoc.display(version='', ln='en', display_hidden=True)
     self.assert_('>test add new file<' in value)
     #format already exist
     self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True)
     #get file
     self.assertEqual(
         my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1)
     #set description
     my_new_bibdoc.set_description('new description', '.jpg', version=1)
     #get description
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                      'new description')
     #set comment
     my_new_bibdoc.set_description('new comment', '.jpg', version=1)
     #get comment
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                      'new comment')
     #get history
     assert len(my_new_bibdoc.get_history()) > 0
     #delete file
     my_new_bibdoc.delete_file('.jpg', 2)
     #list all files
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     #delete file
     my_new_bibdoc.delete_file('.jpg', 3)
     #add new format
     my_new_bibdoc.add_file_new_format(CFG_PREFIX +
                                       '/lib/webtest/invenio/test.gif',
                                       version=None,
                                       description=None,
                                       comment=None,
                                       format=None)
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     #delete file
     my_new_bibdoc.delete_file('.jpg', 1)
     #delete file
     my_new_bibdoc.delete_file('.gif', 1)
     #empty bibdoc
     self.assertEqual(my_new_bibdoc.empty_p(), True)
     #hidden?
     self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False)
     #hide
     my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1)
     #hidden?
     self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True)
     #add and get icon
     my_new_bibdoc.add_icon(CFG_PREFIX +
                            '/lib/webtest/invenio/icon-test.gif')
     value = my_bibrecdoc.list_bibdocs()[1]
     self.assertEqual(value.get_icon(), my_new_bibdoc.get_icon())
     #delete icon
     my_new_bibdoc.delete_icon()
     #get icon
     self.assertEqual(my_new_bibdoc.get_icon(), None)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
     #undelete
     my_new_bibdoc.undelete(previous_status='')
Ejemplo n.º 18
0
def process_batch_job(batch_job_file):
    """ Processes a batch job description dictionary

    @param batch_job_file: a fullpath to a batch job file
    @type batch_job_file: string
    @return: 1 if the process was successfull, 0 if not
    @rtype; int
    """

    def upload_marcxml_file(marcxml):
        """ Creates a temporary marcxml file and sends it to bibupload
        """
        xml_filename = 'bibencode_'+ str(batch_job['recid']) + '_' + str(uuid.uuid4()) + '.xml'
        xml_filename = os.path.join(invenio.config.CFG_TMPSHAREDDIR, xml_filename)
        xml_file = file(xml_filename, 'w')
        xml_file.write(marcxml)
        xml_file.close()
        targs = ['-c', xml_filename]
        task_low_level_submission('bibupload', 'bibencode', *targs)

    #---------#
    # GENERAL #
    #---------#

    _task_write_message("----------- Handling Master -----------")

    ## Check the validity of the batch file here
    batch_job = json_decode_file(batch_job_file)

    ## Sanitise batch description and raise errrors
    batch_job = sanitise_batch_job(batch_job)

    ## Check if the record exists
    if record_exists(batch_job['recid']) < 1:
        raise Exception("Record not found")

    recdoc = BibRecDocs(batch_job['recid'])

    #--------------------#
    # UPDATE FROM MASTER #
    #--------------------#

    ## We want to add new stuff to the video's record, using the master as input
    if getval(batch_job, 'update_from_master'):
        found_master = False
        bibdocs = recdoc.list_bibdocs()
        for bibdoc in bibdocs:
            bibdocfiles = bibdoc.list_all_files()
            for bibdocfile in bibdocfiles:
                comment = bibdocfile.get_comment()
                description = bibdocfile.get_description()
                subformat = bibdocfile.get_subformat()
                m_comment = getval(batch_job, 'bibdoc_master_comment', comment)
                m_description = getval(batch_job, 'bibdoc_master_description', description)
                m_subformat = getval(batch_job, 'bibdoc_master_subformat', subformat)
                if (comment == m_comment and
                    description == m_description and
                    subformat == m_subformat):
                    found_master = True
                    batch_job['input'] = bibdocfile.get_full_path()
                    ## Get the aspect of the from the record
                    try:
                        ## Assumes pbcore metadata mapping
                        batch_job['aspect'] = get_fieldvalues(124, CFG_BIBENCODE_ASPECT_RATIO_MARC_FIELD)[0]
                    except IndexError:
                        pass
                    break
            if found_master:
                break
        if not found_master:
            _task_write_message("Video master for record %d not found"
                          % batch_job['recid'])
            task_update_progress("Video master for record %d not found"
                                 % batch_job['recid'])
            ## Maybe send an email?
            return 1

    ## Clean the job to do no upscaling etc
    if getval(batch_job, 'assure_quality'):
        batch_job = clean_job_for_quality(batch_job)

    global _BATCH_STEPS
    _BATCH_STEPS = len(batch_job['jobs'])

    ## Generate the docname from the input filename's name or given name
    bibdoc_video_docname, bibdoc_video_extension = decompose_file(batch_job['input'])[1:]
    if not bibdoc_video_extension or getval(batch_job, 'bibdoc_master_extension'):
        bibdoc_video_extension = getval(batch_job, 'bibdoc_master_extension')
    if getval(batch_job, 'bibdoc_master_docname'):
        bibdoc_video_docname = getval(batch_job, 'bibdoc_master_docname')

    write_message("Creating BibDoc for %s" % bibdoc_video_docname)
    ## If the bibdoc exists, receive it
    if bibdoc_video_docname in recdoc.get_bibdoc_names():
        bibdoc_video = recdoc.get_bibdoc(bibdoc_video_docname)
    ## Create a new bibdoc if it does not exist
    else:
        bibdoc_video = recdoc.add_bibdoc(docname=bibdoc_video_docname)

    ## Get the directory auf the newly created bibdoc to copy stuff there
    bibdoc_video_directory = bibdoc_video.get_base_dir()

    #--------#
    # MASTER #
    #--------#
    if not getval(batch_job, 'update_from_master'):
        if getval(batch_job, 'add_master'):
            ## Generate the right name for the master
            ## The master should be hidden first an then renamed
            ## when it is really available
            ## !!! FIX !!!
            _task_write_message("Adding %s master to the BibDoc"
                          % bibdoc_video_docname)
            master_format = compose_format(
                                    bibdoc_video_extension,
                                    getval(batch_job, 'bibdoc_master_subformat', 'master')
                                    )
            ## If a file of the same format is there, something is wrong, remove it!
            ## it might be caused by a previous corrupted submission etc.
            if bibdoc_video.format_already_exists_p(master_format):
                bibdoc_video.delete_file(master_format, 1)
            bibdoc_video.add_file_new_format(
                    batch_job['input'],
                    version=1,
                    description=getval(batch_job, 'bibdoc_master_description'),
                    comment=getval(batch_job, 'bibdoc_master_comment'),
                    docformat=master_format
                    )

    #-----------#
    # JOBS LOOP #
    #-----------#

    return_code = 1
    global _BATCH_STEP

    for job in batch_job['jobs']:

        _task_write_message("----------- Job %s of %s -----------"
                           % (_BATCH_STEP, _BATCH_STEPS))

        ## Try to substitute docname with master docname
        if getval(job, 'bibdoc_docname'):
            job['bibdoc_docname'] = Template(job['bibdoc_docname']).safe_substitute({'bibdoc_master_docname': bibdoc_video_docname})

        #-------------#
        # TRANSCODING #
        #-------------#

        if job['mode'] == 'encode':

            ## Skip the job if assure_quality is not set and marked as fallback
            if not getval(batch_job, 'assure_quality') and getval(job, 'fallback'):
                continue

            if getval(job, 'profile'):
                profile = get_encoding_profile(job['profile'])
            else:
                profile = None
            ## We need an extension defined fot the video container
            bibdoc_video_extension = getval(job, 'extension',
                                            getval(profile, 'extension'))
            if not bibdoc_video_extension:
                raise Exception("No container/extension defined")
            ## Get the docname and subformat
            bibdoc_video_subformat = getval(job, 'bibdoc_subformat')
            bibdoc_slave_video_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            ## The subformat is incompatible with ffmpegs name convention
            ## We do the encoding without and rename it afterwards
            bibdoc_video_fullpath = compose_file(
                                                 bibdoc_video_directory,
                                                 bibdoc_slave_video_docname,
                                                 bibdoc_video_extension
                                                 )
            _task_write_message("Transcoding %s to %s;%s" % (bibdoc_slave_video_docname,
                                bibdoc_video_extension,
                                bibdoc_video_subformat))
            ## We encode now directly into the bibdocs directory
            encoding_result = encode_video(
                 input_file=batch_job['input'],
                 output_file=bibdoc_video_fullpath,
                 acodec=getval(job, 'audiocodec'),
                 vcodec=getval(job, 'videocodec'),
                 abitrate=getval(job, 'videobitrate'),
                 vbitrate=getval(job, 'audiobitrate'),
                 resolution=getval(job, 'resolution'),
                 passes=getval(job, 'passes', 1),
                 special=getval(job, 'special'),
                 specialfirst=getval(job, 'specialfirst'),
                 specialsecond=getval(job, 'specialsecond'),
                 metadata=getval(job, 'metadata'),
                 width=getval(job, 'width'),
                 height=getval(job, 'height'),
                 aspect=getval(batch_job, 'aspect'), # Aspect for every job
                 profile=getval(job, 'profile'),
                 update_fnc=_task_update_overall_status,
                 message_fnc=_task_write_message
                 )
            return_code &= encoding_result
            ## only on success
            if  encoding_result:
                ## Rename it, adding the subformat
                os.rename(bibdoc_video_fullpath,
                          compose_file(bibdoc_video_directory,
                                       bibdoc_video_extension,
                                       bibdoc_video_subformat,
                                       1,
                                       bibdoc_slave_video_docname)
                          )
                #bibdoc_video._build_file_list()
                bibdoc_video.touch()
                bibdoc_video._sync_to_db()
                bibdoc_video_format = compose_format(bibdoc_video_extension,
                                                     bibdoc_video_subformat)
                if getval(job, 'bibdoc_comment'):
                    bibdoc_video.set_comment(getval(job, 'bibdoc_comment'),
                                              bibdoc_video_format)
                if getval(job, 'bibdoc_description'):
                    bibdoc_video.set_description(getval(job, 'bibdoc_description'),
                                                 bibdoc_video_format)

        #------------#
        # EXTRACTION #
        #------------#

        # if there are multiple extraction jobs, all the produced files
        # with the same name will be in the same bibdoc! Make sure that
        # you use different subformats or docname templates to avoid
        # conflicts.

        if job['mode'] == 'extract':
            if getval(job, 'profile'):
                profile = get_extract_profile(job['profile'])
            else:
                profile = {}
            bibdoc_frame_subformat = getval(job, 'bibdoc_subformat')
            _task_write_message("Extracting frames to temporary directory")
            tmpdir = invenio.config.CFG_TMPDIR + "/" + str(uuid.uuid4())
            os.mkdir(tmpdir)
            #Move this to the batch description
            bibdoc_frame_docname = getval(job, 'bibdoc_docname', bibdoc_video_docname)
            tmpfname = (tmpdir + "/" + bibdoc_frame_docname + '.'
                        + getval(profile, 'extension',
                        getval(job, 'extension', 'jpg')))
            extraction_result = extract_frames(input_file=batch_job['input'],
                           output_file=tmpfname,
                           size=getval(job, 'size'),
                           positions=getval(job, 'positions'),
                           numberof=getval(job, 'numberof'),
                           width=getval(job, 'width'),
                           height=getval(job, 'height'),
                           aspect=getval(batch_job, 'aspect'),
                           profile=getval(job, 'profile'),
                           update_fnc=_task_update_overall_status,
                           )
            return_code &= extraction_result

            ## only on success:
            if extraction_result:
                ## for every filename in the directorys, create a bibdoc that contains
                ## all sizes of the frame from the two directories
                files = os.listdir(tmpdir)
                for filename in files:
                    ## The docname was altered by BibEncode extract through substitution
                    ## Retrieve it from the filename again
                    bibdoc_frame_docname, bibdoc_frame_extension = os.path.splitext(filename)
                    _task_write_message("Creating new bibdoc for %s" % bibdoc_frame_docname)
                    ## If the bibdoc exists, receive it
                    if bibdoc_frame_docname in recdoc.get_bibdoc_names():
                        bibdoc_frame = recdoc.get_bibdoc(bibdoc_frame_docname)
                    ## Create a new bibdoc if it does not exist
                    else:
                        bibdoc_frame = recdoc.add_bibdoc(docname=bibdoc_frame_docname)

                    ## The filename including path from tmpdir
                    fname = os.path.join(tmpdir, filename)

                    bibdoc_frame_format = compose_format(bibdoc_frame_extension, bibdoc_frame_subformat)
                    ## Same as with the master, if the format allready exists,
                    ## override it, because something went wrong before
                    if bibdoc_frame.format_already_exists_p(bibdoc_frame_format):
                        bibdoc_frame.delete_file(bibdoc_frame_format, 1)
                    _task_write_message("Adding %s jpg;%s to BibDoc"
                                  % (bibdoc_frame_docname,
                                     getval(job, 'bibdoc_subformat')))
                    bibdoc_frame.add_file_new_format(
                                    fname,
                                    version=1,
                                    description=getval(job, 'bibdoc_description'),
                                    comment=getval(job, 'bibdoc_comment'),
                                    docformat=bibdoc_frame_format)
            ## Remove the temporary folders
            _task_write_message("Removing temporary directory")
            shutil.rmtree(tmpdir)

        _BATCH_STEP = _BATCH_STEP + 1

    #-----------------#
    # FIX BIBDOC/MARC #
    #-----------------#

    _task_write_message("----------- Handling MARCXML -----------")

    ## Fix the BibDoc for all the videos previously created
    _task_write_message("Updating BibDoc of %s" % bibdoc_video_docname)
    bibdoc_video._build_file_list()

    ## Fix the MARC
    _task_write_message("Fixing MARC")
    cli_fix_marc({}, [batch_job['recid']], False)

    if getval(batch_job, 'collection'):
        ## Make the record visible by moving in from the collection
        marcxml = ("<record><controlfield tag=\"001\">%d</controlfield>"
                   "<datafield tag=\"980\" ind1=\" \" ind2=\" \">"
                   "<subfield code=\"a\">%s</subfield></datafield></record>"
                   ) % (batch_job['recid'], batch_job['collection'])
        upload_marcxml_file(marcxml)

    #---------------------#
    # ADD MASTER METADATA #
    #---------------------#

    if getval(batch_job, 'add_master_metadata'):
        _task_write_message("Adding master metadata")
        pbcore = pbcore_metadata(input_file = getval(batch_job, 'input'),
                                 pbcoreIdentifier = batch_job['recid'],
                                 aspect_override = getval(batch_job, 'aspect'))
        marcxml = format(pbcore, CFG_BIBENCODE_PBCORE_MARC_XSLT)
        upload_marcxml_file(marcxml)

    #------------------#
    # ADD MARC SNIPPET #
    #------------------#

    if getval(batch_job, 'marc_snippet'):
        marc_snippet = open(getval(batch_job, 'marc_snippet'))
        marcxml = marc_snippet.read()
        marc_snippet.close()
        upload_marcxml_file(marcxml)

    #--------------#
    # DELETE INPUT #
    #--------------#

    if getval(batch_job, 'delete_input'):
        _task_write_message("Deleting input file")
        # only if successfull
        if not return_code:
            # only if input matches pattern
            if getval(batch_job, 'delete_input_pattern', '') in getval(batch_job, 'input'):
                try:
                    os.remove(getval(batch_job, 'input'))
                except OSError:
                    pass

    #--------------#
    # NOTIFICATION #
    #--------------#

    ## Send Notification emails on errors
    if not return_code:
        if getval(batch_job, 'notify_user'):
            _notify_error_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
            _task_write_message("Notify user because of an error")
        if getval(batch_job, 'notify_admin'):
            _task_write_message("Notify admin because of an error")
            if type(getval(batch_job, 'notify_admin') == type(str()) ):
                _notify_error_admin(batch_job,
                                    getval(batch_job, 'notify_admin'))

            else:
                _notify_error_admin(batch_job)
    else:
        if getval(batch_job, 'notify_user'):
            _task_write_message("Notify user because of success")
            _notify_success_user(getval(batch_job, 'notify_user'),
                               getval(batch_job, 'submission_filename', batch_job['input']),
                               getval(batch_job, 'recid'),
                               getval(batch_job, 'submission_title', ""))
    return 1
    def test_BibDocs(self):
        """bibdocfile - BibDocs functions"""
        #add file
        my_bibrecdoc = BibRecDocs(2)
        timestamp1 = datetime(*(time.strptime("2011-10-09 08:07:06", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg', modification_date=timestamp1)
        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        value = my_bibrecdoc.list_bibdocs()
        self.assertEqual(len(value), 2)
        #get total file (bibdoc)
        self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
        #get recid
        self.assertEqual(my_new_bibdoc.bibrec_links[0]["recid"], 2)
        #change name
        my_new_bibdoc.change_name(2, 'new_name')
        #get docname
        my_bibrecdoc = BibRecDocs(2)
        self.assertEqual(my_bibrecdoc.get_docname(my_new_bibdoc.id), 'new_name')
        #get type
        self.assertEqual(my_new_bibdoc.get_type(), 'Main')
        #get id
        self.assert_(my_new_bibdoc.get_id() > 80)
        #set status
        my_new_bibdoc.set_status('new status')
        #get status
        self.assertEqual(my_new_bibdoc.get_status(), 'new status')
        #get base directory
        self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_BIBDOCFILE_FILEDIR))
        #get file number
        self.assertEqual(my_new_bibdoc.get_file_number(), 1)
        #add file new version
        timestamp2 = datetime(*(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', description= 'the new version', comment=None, docformat=None, flags=["PERFORM_HIDE_PREVIOUS"], modification_date=timestamp2)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
        #revert
        timestamp3 = datetime.now()
        time.sleep(2) # so we can see a difference between now() and the time of the revert
        my_new_bibdoc.revert(1)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3), 'test add new file')
        #get total size latest version
        self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
        #get latest version
        self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
        #list latest files
        self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
        self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
        #list version files
        self.assertEqual(len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
        #display # No Display facility inside of an object !
#        value = my_new_bibdoc.display(version='', ln='en', display_hidden=True)
#        self.assert_('>test add new file<' in value)
        #format already exist
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True)
        #get file
        self.assertEqual(my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1)
        #set description
        my_new_bibdoc.set_description('new description', '.jpg', version=1)
        #get description
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new description')
        #set comment
        my_new_bibdoc.set_description('new comment', '.jpg', version=1)
        #get comment
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new comment')
        #get history
        assert len(my_new_bibdoc.get_history()) > 0
        #check modification date
        self.assertEqual(my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(my_new_bibdoc.get_file('.jpg', version=2).md, timestamp2)
        assert my_new_bibdoc.get_file('.jpg', version=3).md > timestamp3
        #delete file
        my_new_bibdoc.delete_file('.jpg', 2)
        #list all files
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 3)
        #add new format
        timestamp4 = datetime(*(time.strptime("2012-11-10 09:08:07", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_format(CFG_PREFIX + '/lib/webtest/invenio/test.gif', version=None, description=None, comment=None, docformat=None, modification_date=timestamp4)
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #check modification time
        self.assertEqual(my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(my_new_bibdoc.get_file('.gif', version=1).md, timestamp4)
        #change the format name
        my_new_bibdoc.change_docformat('.gif', '.gif;icon-640')
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.gif'), False)
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.gif;icon-640'), True)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 1)
        #delete file
        my_new_bibdoc.delete_file('.gif;icon-640', 1)
        #empty bibdoc
        self.assertEqual(my_new_bibdoc.empty_p(), True)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False)
        #hide
        my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True)
        #add and get icon

        my_new_bibdoc.add_icon( CFG_PREFIX + '/lib/webtest/invenio/icon-test.gif', modification_date=timestamp4)

        my_bibrecdoc = BibRecDocs(2)
        value =  my_bibrecdoc.get_bibdoc("new_name")
        self.assertEqual(value.get_icon().docid, my_new_bibdoc.get_icon().docid)
        self.assertEqual(value.get_icon().version, my_new_bibdoc.get_icon().version)
        self.assertEqual(value.get_icon().format, my_new_bibdoc.get_icon().format)

        #check modification time
        self.assertEqual(my_new_bibdoc.get_icon().md, timestamp4)
        #delete icon
        my_new_bibdoc.delete_icon()
        #get icon
        self.assertEqual(my_new_bibdoc.get_icon(), None)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
        #undelete
        my_new_bibdoc.undelete(previous_status='', recid=2)
        #expunging
        my_new_bibdoc.expunge()
        my_bibrecdoc.build_bibdoc_list()
        self.failIf('new_name' in my_bibrecdoc.get_bibdoc_names())
        self.failUnless(my_bibrecdoc.get_bibdoc_names())
Ejemplo n.º 20
0
    def get_record(self, path=None, no_pdf=False):
        xml_doc = self.get_article(path)
        rec = {}
        title = self.get_title(xml_doc)
        if title:
            record_add_field(rec, '245', subfields=[('a', title)])
        journal, dummy, volume, issue, first_page,\
            last_page, year, start_date, doi = self.get_publication_information(
                xml_doc)
        if not journal:
            journal = self.get_article_journal(xml_doc)
        if start_date:
            record_add_field(rec, '260', subfields=[('c', start_date)])
        else:
            record_add_field(
                rec, '260', subfields=[('c', time.strftime('%Y-%m-%d'))])
        if doi:
            record_add_field(rec, '024', ind1='7', subfields=[('a', doi),
                                                              ('2', 'DOI')])
        self.logger.info("Creating record: %s %s" % (path, doi))
        authors = self.get_authors(xml_doc)
        first_author = True
        for author in authors:
            author_name = (author['surname'], author.get(
                'given_name') or author.get('initials'))
            subfields = [('a', '%s, %s' % author_name)]
            if 'orcid' in author:
                subfields.append(('j', author['orcid']))
            if 'affiliation' in author:
                for aff in author["affiliation"]:
                    subfields.append(('v', aff))
            if author.get('email'):
                subfields.append(('m', author['email']))
            if first_author:
                record_add_field(rec, '100', subfields=subfields)
                first_author = False
            else:
                record_add_field(rec, '700', subfields=subfields)
        abstract = self.get_abstract(xml_doc)
        if abstract:
            record_add_field(rec, '520', subfields=[('a', abstract),
                                                    ('9', 'Elsevier')])
        copyrightt = self.get_copyright(xml_doc)
        if copyright:
            record_add_field(rec, '542', subfields=[('f', copyrightt)])
        keywords = self.get_keywords(xml_doc)
        if self.CONSYN:
            if keywords:
                for keyword in keywords:
                    record_add_field(
                        rec, '653', ind1='1', subfields=[('a', keyword),
                                    ('9', 'author')])
            journal, dummy = fix_journal_name(journal.strip(), self.journal_mappings)            
            subfields = []
            doctype = self.get_doctype(xml_doc)
            if doctype == 'err':
                subfields.append(('m', 'Erratum'))
            elif doctype == 'add':
                subfields.append(('m', 'Addendum'))
            elif doctype == 'pub':
                subfields.append(('m', 'Publisher Note'))
            if journal:
                subfields.append(('p', journal))
            if first_page and last_page:
                subfields.append(('c', '%s-%s' % (first_page, last_page)))
            elif first_page:
                subfields.append(('c', first_page))           
            if volume:
                subfields.append(('v', volume))
            if issue:
                subfields.append(('n', issue))
            if year:
                subfields.append(('y', year))
            record_add_field(rec, '773', subfields=subfields)
        else:
            licence = 'http://creativecommons.org/licenses/by/3.0/'
            record_add_field(rec, '540', subfields=[('a', 'CC-BY-3.0'),
                                                    ('u', licence)])
            if keywords:
                for keyword in keywords:
                    record_add_field(
                        rec, '653', ind1='1', subfields=[('a', keyword),
                                    ('9', 'author')])
            record_add_field(rec, '773', subfields=[('p', journal),
                                                    ('v', volume),
                                                    ('n', issue),
                                                    ('c', '%s-%s' % (
                                                        first_page, last_page)),
                                                    ('y', year)])
        self._add_references(xml_doc, rec)
        if self.CONSYN:
            record_add_field(rec, 'FFT', subfields=[('a', path),
                                                    ('t', 'Elsevier'),
                                                    ('o', 'HIDDEN')])
            record_add_field(rec, '980', subfields=[('a', 'HEP')])
            record_add_field(rec, '980', subfields=[('a', 'Citeable')])
            record_add_field(rec, '980', subfields=[('a', 'Published')])
            if doctype == 'rev':
                record_add_field(rec, '980', subfields=[('a', 'Review')])
        else:
            if not no_pdf:
                from invenio.search_engine import search_pattern
                query = '0247_a:"%s" AND NOT 980:DELETED"' % (doi,)
                prev_version = search_pattern(p=query)
                from invenio.bibdocfile import BibRecDocs
                old_pdf = False

                if prev_version:
                    prev_rec = BibRecDocs(prev_version[0])
                    try:
                        pdf_path = prev_rec.get_bibdoc('main')
                        pdf_path = pdf_path.get_file(
                            ".pdf;pdfa", exact_docformat=True)
                        pdf_path = pdf_path.fullpath
                        old_pdf = True
                        record_add_field(rec, 'FFT', subfields=[('a', pdf_path),
                                                                ('n', 'main'),
                                                                ('f', '.pdf;pdfa')])
                        message = 'Leaving previously delivered PDF/A for: ' + doi
                        self.logger.info(message)
                    except:
                        pass
                try:
                    if exists(join(path, 'main_a-2b.pdf')):
                        path = join(path, 'main_a-2b.pdf')
                        record_add_field(rec, 'FFT', subfields=[('a', path),
                                                                ('n', 'main'),
                                                                ('f', '.pdf;pdfa')])
                        self.logger.debug('Adding PDF/A to record: %s' % (doi,))
                    elif exists(join(path, 'main.pdf')):
                        path = join(path, 'main.pdf')
                        record_add_field(rec, 'FFT', subfields=[('a', path)])
                    else:
                        if not old_pdf:
                            message = "Record " + doi
                            message += " doesn't contain PDF file."
                            self.logger.warning(message)
                            raise MissingFFTError(message)
                except MissingFFTError:
                    message = "Elsevier paper: %s is missing PDF." % (doi,)
                    register_exception(alert_admin=True, prefix=message)
                version = self.get_elsevier_version(find_package_name(path))
                record_add_field(rec, '583', subfields=[('l', version)])
                path = join(path, 'main.xml')
                record_add_field(rec, 'FFT', subfields=[('a', path)])
                record_add_field(rec, '980', subfields=[('a', 'SCOAP3'),
                                                        ('b', 'Elsevier')])
        try:
            return record_xml_output(rec)
        except UnicodeDecodeError:
            message = "Found a bad char in the file for the article " + doi
            sys.stderr.write(message)
            return ""
Ejemplo n.º 21
0
    def test_BibDocs(self):
        """bibdocfile - BibDocs functions"""
        #add file
        my_bibrecdoc = BibRecDocs(2)
        timestamp1 = datetime(
            *(time.strptime("2011-10-09 08:07:06", "%Y-%m-%d %H:%M:%S")[:6]))
        my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg',
                                  'Main',
                                  'img_test',
                                  False,
                                  'test add new file',
                                  'test',
                                  '.jpg',
                                  modification_date=timestamp1)
        my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
        value = my_bibrecdoc.list_bibdocs()
        self.assertEqual(len(value), 2)
        #get total file (bibdoc)
        self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
        #get recid
        self.assertEqual(my_new_bibdoc.bibrec_links[0]["recid"], 2)
        #change name
        my_new_bibdoc.change_name(2, 'new_name')
        #get docname
        my_bibrecdoc = BibRecDocs(2)
        self.assertEqual(my_bibrecdoc.get_docname(my_new_bibdoc.id),
                         'new_name')
        #get type
        self.assertEqual(my_new_bibdoc.get_type(), 'Main')
        #get id
        self.assert_(my_new_bibdoc.get_id() > 80)
        #set status
        my_new_bibdoc.set_status('new status')
        #get status
        self.assertEqual(my_new_bibdoc.get_status(), 'new status')
        #get base directory
        self.assert_(
            my_new_bibdoc.get_base_dir().startswith(CFG_BIBDOCFILE_FILEDIR))
        #get file number
        self.assertEqual(my_new_bibdoc.get_file_number(), 1)
        #add file new version
        timestamp2 = datetime(
            *(time.strptime("2010-09-08 07:06:05", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_version(CFG_PREFIX +
                                           '/lib/webtest/invenio/test.jpg',
                                           description='the new version',
                                           comment=None,
                                           docformat=None,
                                           flags=["PERFORM_HIDE_PREVIOUS"],
                                           modification_date=timestamp2)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
        #revert
        timestamp3 = datetime.now()
        time.sleep(
            2
        )  # so we can see a difference between now() and the time of the revert
        my_new_bibdoc.revert(1)
        self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3),
                         'test add new file')
        #get total size latest version
        self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
        #get latest version
        self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
        #list latest files
        self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
        self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
        #list version files
        self.assertEqual(
            len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
        #display # No Display facility inside of an object !
        #        value = my_new_bibdoc.display(version='', ln='en', display_hidden=True)
        #        self.assert_('>test add new file<' in value)
        #format already exist
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True)
        #get file
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1)
        #set description
        my_new_bibdoc.set_description('new description', '.jpg', version=1)
        #get description
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                         'new description')
        #set comment
        my_new_bibdoc.set_description('new comment', '.jpg', version=1)
        #get comment
        self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1),
                         'new comment')
        #get history
        assert len(my_new_bibdoc.get_history()) > 0
        #check modification date
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=2).md, timestamp2)
        assert my_new_bibdoc.get_file('.jpg', version=3).md > timestamp3
        #delete file
        my_new_bibdoc.delete_file('.jpg', 2)
        #list all files
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 3)
        #add new format
        timestamp4 = datetime(
            *(time.strptime("2012-11-10 09:08:07", "%Y-%m-%d %H:%M:%S")[:6]))
        my_new_bibdoc.add_file_new_format(CFG_PREFIX +
                                          '/lib/webtest/invenio/test.gif',
                                          version=None,
                                          description=None,
                                          comment=None,
                                          docformat=None,
                                          modification_date=timestamp4)
        self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
        #check modification time
        self.assertEqual(
            my_new_bibdoc.get_file('.jpg', version=1).md, timestamp1)
        self.assertEqual(
            my_new_bibdoc.get_file('.gif', version=1).md, timestamp4)
        #change the format name
        my_new_bibdoc.change_docformat('.gif', '.gif;icon-640')
        self.assertEqual(my_new_bibdoc.format_already_exists_p('.gif'), False)
        self.assertEqual(
            my_new_bibdoc.format_already_exists_p('.gif;icon-640'), True)
        #delete file
        my_new_bibdoc.delete_file('.jpg', 1)
        #delete file
        my_new_bibdoc.delete_file('.gif;icon-640', 1)
        #empty bibdoc
        self.assertEqual(my_new_bibdoc.empty_p(), True)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False)
        #hide
        my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1)
        #hidden?
        self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True)
        #add and get icon

        my_new_bibdoc.add_icon(CFG_PREFIX +
                               '/lib/webtest/invenio/icon-test.gif',
                               modification_date=timestamp4)

        my_bibrecdoc = BibRecDocs(2)
        value = my_bibrecdoc.get_bibdoc("new_name")
        self.assertEqual(value.get_icon().docid,
                         my_new_bibdoc.get_icon().docid)
        self.assertEqual(value.get_icon().version,
                         my_new_bibdoc.get_icon().version)
        self.assertEqual(value.get_icon().format,
                         my_new_bibdoc.get_icon().format)

        #check modification time
        self.assertEqual(my_new_bibdoc.get_icon().md, timestamp4)
        #delete icon
        my_new_bibdoc.delete_icon()
        #get icon
        self.assertEqual(my_new_bibdoc.get_icon(), None)
        #delete
        my_new_bibdoc.delete()
        self.assertEqual(my_new_bibdoc.deleted_p(), True)
        #undelete
        my_new_bibdoc.undelete(previous_status='', recid=2)
        #expunging
        my_new_bibdoc.expunge()
        my_bibrecdoc.build_bibdoc_list()
        self.failIf('new_name' in my_bibrecdoc.get_bibdoc_names())
        self.failUnless(my_bibrecdoc.get_bibdoc_names())
def Stamp_Replace_Single_File_Approval(parameters, \
                                       curdir, \
                                       form, \
                                       user_info=None):
    """
    This function is intended to be called when a document has been
    approved and needs to be stamped.
    The function should be used when there is ONLY ONE file to be
    stamped after approval (for example, the "main file").
    The name of the file to be stamped should be known and should be stored
    in a file in the submission's working directory (without the extension).
    Generally, this will work our fine as the main file is named after the
    report number of the document, this will be stored in the report number
    file.

    @param parameters: (dictionary) - must contain:

         + latex_template: (string) - the name of the LaTeX template that
            should be used for the creation of the stamp.

         + latex_template_vars: (string) - a string-ified dictionary
            of variables to be replaced in the LaTeX template and the
            values (or names of files in curdir containing the values)
            with which to replace them. Use prefix 'FILE:' to specify
            that the stamped value must be read from a file in
            submission directory instead of being a fixed value to
            stamp.
            E.G.:
               { 'TITLE' : 'FILE:DEMOTHESIS_TITLE',
                 'DATE'  : 'FILE:DEMOTHESIS_DATE'
               }

         + file_to_be_stamped: (string) - this is the name of a file in the
            submission's working directory that contains the name of the
            bibdocfile that is to be stamped.

         + new_file_name: (string) - this is the name of a file in the
            submission's working directory that contains the name that is to
            be given to the file after it has been stamped. If empty, or if
            that file doesn't exist, the file will not be renamed after
            stamping.

         + switch_file: (string) - when this value is set, specifies
            the name of a file that will swith on/off the
            stamping. The stamp will be applied if the file exists in
            the submission directory and is not empty. If the file
            cannot be found or is empty, the stamp is not applied.
            Useful for eg. if you want to let your users control the
            stamping with a checkbox on your submission page.
            Leave this parameter empty to always stamp by default.

         + stamp: (string) - the type of stamp to be applied to the file.
            should be one of:
              + first (only the first page is stamped);
              + all (all pages are stamped);
              + coverpage (a separate cover-page is added to the file as a
                 first page);

         + layer: (string) - the position of the stamp. Should be one of:
              + background (invisible if original file has a white
                -not transparent- background layer)
              + foreground (on top of the stamped file.  If the stamp
                does not have a transparent background, will hide all
                of the document layers)
           The default value is 'background'.
    """
    ############
    ## Definition of important variables:
    ############
    ## The file stamper needs to be called with a dictionary of options of
    ## the following format:
    ##  { 'latex-template'      : "", ## TEMPLATE_NAME
    ##    'latex-template-var'  : {}, ## TEMPLATE VARIABLES
    ##    'input-file'          : "", ## INPUT FILE
    ##    'output-file'         : "", ## OUTPUT FILE
    ##    'stamp'               : "", ## STAMP TYPE
    ##    'layer'               : "", ## LAYER TO STAMP
    ##    'verbosity'           : 0,  ## VERBOSITY (we don't care about it)
    ##  }
    file_stamper_options = { 'latex-template'      : "",
                             'latex-template-var'  : { },
                             'input-file'          : "",
                             'output-file'         : "",
                             'stamp'               : "",
                             'layer'               : "",
                             'verbosity'           : 0,
                           }

    ## Check if stamping is enabled
    switch_file = parameters.get('switch_file', '')
    if switch_file:
        # Good, a "switch file" was specified. Check if it exists, and
        # it its value is not empty.
        if not _read_in_file(os.path.join(curdir, switch_file)):
            # File does not exist, or is emtpy. Silently abort
            # stamping.
            return ""

    ## Submission access number:
    access = _read_in_file("%s/access" % curdir)
    ## record ID for the current submission. It is found in the special file
    ## "SN" (sysno) in curdir:
    recid = _read_in_file("%s/SN" % curdir)
    try:
        recid = int(recid)
    except ValueError:
        ## No record ID. Cannot continue.
        err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                  "Cannot recover record ID from the submission's working " \
                  "directory. Stamping cannot be carried out. The " \
                  "submission ID is [%s]." % cgi.escape(access)
        register_exception(prefix=err_msg)
        raise InvenioWebSubmitFunctionError(err_msg)
    ############
    ## Resolution of function parameters:
    ############
    ## The name of the LaTeX template to be used for stamp creation:
    latex_template = "%s" % ((type(parameters['latex_template']) is str \
                              and parameters['latex_template']) or "")
    ## A string containing the variables/values that should be substituted
    ## in the final (working) LaTeX template:
    latex_template_vars_string = "%s" % \
                       ((type(parameters['latex_template_vars']) is str \
                         and parameters['latex_template_vars']) or "")
    ## The type of stamp to be applied to the file(s):
    stamp = "%s" % ((type(parameters['stamp']) is str and \
                     parameters['stamp'].lower()) or "")
    ## The layer to use for stamping:
    try:
        layer = parameters['layer']
    except KeyError:
        layer = "background"
    if not layer in ('background', 'foreground'):
        layer = "background"
    ## Get the name of the file to be stamped from the file indicated in
    ## the file_to_be_stamped parameter:
    try:
        file_to_stamp_file = parameters['file_to_be_stamped']
    except KeyError:
        file_to_stamp_file = ""
    else:
        if file_to_stamp_file is None:
            file_to_stamp_file = ""
    ## Get the "basename" for the file to be stamped (it's mandatory that it
    ## be in curdir):
    file_to_stamp_file = os.path.basename(file_to_stamp_file).strip()
    name_file_to_stamp = _read_in_file("%s/%s" % (curdir, file_to_stamp_file))
    name_file_to_stamp.replace("\n", "").replace("\r", "")
    ##
    ## Get the name to be given to the file after it has been stamped (if there
    ## is one.) Once more, it will be found in a file in curdir:
    try:
        new_file_name_file = parameters['new_file_name']
    except KeyError:
        new_file_name_file = ""
    else:
        if new_file_name_file is None:
            new_file_name_file = ""
    ## Get the "basename" for the file containing the new file name. (It's
    ## mandatory that it be in curdir):
    new_file_name_file = os.path.basename(new_file_name_file).strip()
    new_file_name = _read_in_file("%s/%s" % (curdir, new_file_name_file))

    ############
    ## Begin:
    ############
    ##
    ## If no name for the file to stamp, warning.
    if name_file_to_stamp == "":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a valid name for the " \
                  "file to be stamped. Stamping could not, therefore, be " \
                  "carried out. The submission ID is [%s]." \
                  % access
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ##
    ## The file to be stamped is a bibdoc. We will only stamp it (a) if it
    ## exists; and (b) if it is a PDF file. So, get the path (in the bibdocs
    ## tree) to the file to be stamped:
    ##
    ## First get the object representing the bibdocs belonging to this record:
    bibrecdocs = BibRecDocs(recid)
    try:
        bibdoc_file_to_stamp = bibrecdocs.get_bibdoc("%s" % name_file_to_stamp)
    except InvenioWebSubmitFileError:
        ## Couldn't get a bibdoc object for this filename. Probably the file
        ## that we wanted to stamp wasn't attached to this record.
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It was not possible to recover a bibdoc object for the " \
                  "filename [%s] when trying to stamp the main file. " \
                  "Stamping could not be carried out. The submission ID is " \
                  "[%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ## Get the BibDocFile object for the PDF version of the bibdoc to be
    ## stamped:
    try:
        bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("pdf")
    except InvenioWebSubmitFileError:
        ## This bibdoc doesn't have a physical file with the extension ".pdf"
        ## (take note of the lower-case extension - the bibdocfile library
        ## is case-sensitive with respect to filenames.  Log that there was
        ## no "pdf" and check for a file with extension "PDF":
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "It wasn't possible to recover a PDF BibDocFile object " \
                  "for the file with the name [%s], using the extension " \
                  "[pdf] - note the lower case - the bibdocfile library " \
                  "relies upon the case of an extension. The submission ID " \
                  "is [%s] and the record ID is [%s]. Going to try " \
                  "looking for a file with a [PDF] extension before giving " \
                  "up . . . " \
                  % (name_file_to_stamp, access, recid)
        register_exception(prefix=wrn_msg)
        try:
            bibdocfile_file_to_stamp = bibdoc_file_to_stamp.get_file("PDF")
        except InvenioWebSubmitFileError:
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "It wasn't possible to recover a PDF " \
                      "BibDocFile object for the file with the name [%s], " \
                      "using the extension [PDF] - note the upper case. " \
                      "Had previously tried searching for [pdf] - now " \
                      "giving up. Stamping could not be carried out. " \
                      "The submission ID is [%s] and the record ID is [%s]." \
                      % (name_file_to_stamp, access, recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
    ############
    ## Go ahead and prepare the details for the LaTeX stamp template and its
    ## variables:
    ############
    ## Strip the LaTeX filename into the basename (All templates should be
    ## in the template repository):
    latex_template = os.path.basename(latex_template)

    ## Convert the string of latex template variables into a dictionary
    ## of search-term/replacement-term pairs:
    latex_template_vars = get_dictionary_from_string(latex_template_vars_string)
    ## For each of the latex variables, check in `CURDIR' for a file with that
    ## name. If found, use it's contents as the template-variable's value.
    ## If not, just use the raw value string already held by the template
    ## variable:
    latex_template_varnames = latex_template_vars.keys()
    for varname in latex_template_varnames:
        ## Get this variable's value:
        varvalue = latex_template_vars[varname].strip()
        if not ((varvalue.find("date(") == 0 and varvalue[-1] == ")") or \
                (varvalue.find("include(") == 0 and varvalue[-1] == ")")) \
                and varvalue != "":
            ## We don't want to interfere with date() or include() directives,
            ## so we only do this if the variable value didn't contain them:
            ##
            ## Is this variable value the name of a file in the current
            ## submission's working directory, from which a literal value for
            ## use in the template should be extracted? If yes, it will
            ## begin with "FILE:". If no, we leave the value exactly as it is.
            if varvalue.upper().find("FILE:") == 0:
                ## The value to be used is to be taken from a file. Clean the
                ## file name and if it's OK, extract that value from the file.
                ##
                seekvalue_fname = varvalue[5:].strip()
                seekvalue_fname = os.path.basename(seekvalue_fname).strip()
                if seekvalue_fname != "":
                    ## Attempt to extract the value from the file:
                    if os.access("%s/%s" % (curdir, seekvalue_fname), \
                                 os.R_OK|os.F_OK):
                        ## The file exists. Extract its value:
                        try:
                            repl_file_val = \
                              open("%s/%s" \
                                   % (curdir, seekvalue_fname), "r").readlines()
                        except IOError:
                            ## The file was unreadable.
                            err_msg = "Error in Stamp_Replace_Single_File_" \
                                      "Approval: The function attempted to " \
                                      "read a LaTex template variable " \
                                      "value from the following file in the " \
                                      "current submission's working " \
                                      "directory: [%s]. However, an " \
                                      "unexpected error was encountered " \
                                      "when doing so. Please inform the " \
                                      "administrator." \
                                      % seekvalue_fname
                            register_exception(req=user_info['req'])
                            raise InvenioWebSubmitFunctionError(err_msg)
                        else:
                            final_varval = ""
                            for line in repl_file_val:
                                final_varval += line
                            final_varval = final_varval.rstrip()
                            ## Replace the variable value with that which has
                            ## been read from the file:
                            latex_template_vars[varname] = final_varval
                    else:
                        ## The file didn't actually exist in the current
                        ## submission's working directory. Use an empty
                        ## value:
                        latex_template_vars[varname] = ""
                else:
                    ## The filename was not valid.
                    err_msg = "Error in Stamp_Replace_Single_File_Approval: " \
                              "The function was configured to read a LaTeX " \
                              "template variable from a file with the " \
                              "following instruction: [%s --> %s]. The " \
                              "filename, however, was not considered valid. " \
                              "Please report this to the administrator." \
                              % (varname, varvalue)
                    raise InvenioWebSubmitFunctionError(err_msg)

    ## Put the 'fixed' values into the file_stamper_options dictionary:
    file_stamper_options['latex-template'] = latex_template
    file_stamper_options['latex-template-var'] = latex_template_vars
    file_stamper_options['stamp'] = stamp
    file_stamper_options['layer'] = layer

    ## Put the input file and output file into the file_stamper_options
    ## dictionary:
    file_stamper_options['input-file'] = bibdocfile_file_to_stamp.fullpath
    file_stamper_options['output-file'] = bibdocfile_file_to_stamp.fullname
    ##
    ## Before attempting to stamp the file, log the dictionary of arguments
    ## that will be passed to websubmit_file_stamper:
    try:
        fh_log = open("%s/websubmit_file_stamper-calls-options.log" \
                      % curdir, "a+")
        fh_log.write("%s\n" % file_stamper_options)
        fh_log.flush()
        fh_log.close()
    except IOError:
        ## Unable to log the file stamper options.
        exception_prefix = "Unable to write websubmit_file_stamper " \
                           "options to log file " \
                           "%s/websubmit_file_stamper-calls-options.log" \
                           % curdir
        register_exception(prefix=exception_prefix)

    try:
        ## Try to stamp the file:
        (stamped_file_path_only, stamped_file_name) = \
                websubmit_file_stamper.stamp_file(file_stamper_options)
    except InvenioWebSubmitFileStamperError:
        ## It wasn't possible to stamp this file.
        ## Register the exception along with an informational message:
        wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                  "There was a problem stamping the file with the name [%s] " \
                  "and the fullpath [%s]. The file has not been stamped. " \
                  "The submission ID is [%s] and the record ID is [%s]." \
                  % (name_file_to_stamp, \
                     file_stamper_options['input-file'], \
                     access, \
                     recid)
        register_exception(prefix=wrn_msg)
        raise InvenioWebSubmitFunctionWarning(wrn_msg)
    else:
        ## Stamping was successful. The BibDocFile must now be revised with
        ## the latest (stamped) version of the file:
        file_comment = "Stamped by WebSubmit: %s" \
                       % time.strftime("%d/%m/%Y", time.localtime())
        try:
            dummy = \
                  bibrecdocs.add_new_version("%s/%s" \
                                             % (stamped_file_path_only, \
                                                stamped_file_name), \
                                                name_file_to_stamp, \
                                                comment=file_comment, \
                                                flags=('STAMPED', ))
        except InvenioWebSubmitFileError:
            ## Unable to revise the file with the newly stamped version.
            wrn_msg = "Warning in Stamp_Replace_Single_File_Approval: " \
                      "After having stamped the file with the name [%s] " \
                      "and the fullpath [%s], it wasn't possible to revise " \
                      "that file with the newly stamped version. Stamping " \
                      "was unsuccessful. The submission ID is [%s] and the " \
                      "record ID is [%s]." \
                      % (name_file_to_stamp, \
                         file_stamper_options['input-file'], \
                         access, \
                         recid)
            register_exception(prefix=wrn_msg)
            raise InvenioWebSubmitFunctionWarning(wrn_msg)
        else:
            ## File revised. If the file should be renamed after stamping,
            ## do so.
            if new_file_name != "":
                try:
                    bibdoc_file_to_stamp.change_name(new_file_name)
                except (IOError, InvenioWebSubmitFileError):
                    ## Unable to change the name
                    wrn_msg = "Warning in Stamp_Replace_Single_File_Approval" \
                              ": After having stamped and revised the file " \
                              "with the name [%s] and the fullpath [%s], it " \
                              "wasn't possible to rename it to [%s]. The " \
                              "submission ID is [%s] and the record ID is " \
                              "[%s]." \
                              % (name_file_to_stamp, \
                                 file_stamper_options['input-file'], \
                                 new_file_name, \
                                 access, \
                                 recid)
    ## Finished.
    return ""
Ejemplo n.º 23
0
    def get_record(self, path=None, no_pdf=False):
        xml = self.get_article(path)
        rec = {}
        title = self.get_title(xml)
        if title:
            record_add_field(rec, '245', subfields=[('a', title)])
        journal, issn, volume, issue, first_page, last_page, year, start_date, doi = self.get_publication_information(xml)
        if not journal:
            journal = self.get_article_journal(xml)
        if start_date:
            record_add_field(rec, '260', subfields=[('c', start_date)])
        else:
            record_add_field(rec, '260', subfields=[('c', time.strftime('%Y-%m-%d'))])
        if doi:
            record_add_field(rec, '024', ind1='7', subfields=[('a', doi), ('2', 'DOI')])
        self.logger.info("Creating record: %s %s" % (path, doi))
        authors = self.get_authors(xml)
        first_author = True
        for author in authors:
            subfields = [('a', '%s, %s' % (author['surname'], author.get('given_name') or author.get('initials')))]
            if 'orcid' in author:
                subfields.append(('j', author['orcid']))
            if 'affiliation' in author:
                for aff in author["affiliation"]:
                    subfields.append(('v', aff))
            if author.get('email'):
                subfields.append(('m', author['email']))
            if first_author:
                record_add_field(rec, '100', subfields=subfields)
                first_author = False
            else:
                record_add_field(rec, '700', subfields=subfields)
        abstract = self.get_abstract(xml)
        if abstract:
            record_add_field(rec, '520', subfields=[('a', abstract), ('9', 'Elsevier')])
        record_add_field(rec, '540', subfields=[('a', 'CC-BY-3.0'), ('u', 'http://creativecommons.org/licenses/by/3.0/')])
        copyright = self.get_copyright(xml)
        if copyright:
            record_add_field(rec, '542', subfields=[('f', copyright)])
        keywords = self.get_keywords(xml)
        if keywords:
            for keyword in keywords:
                record_add_field(rec, '653', ind1='1', subfields=[('a', keyword), ('9', 'author')])
        record_add_field(rec, '773', subfields=[('p', journal), ('v', volume), ('n', issue), ('c', '%s-%s' % (first_page, last_page)), ('y', year)])
        references = self.get_references(xml)
        for label, authors, r_doi, issue, page, title, volume, year, textref, ext_link in references:
            subfields = []
            if doi:
                subfields.append(('a', r_doi))
            for author in authors:
                subfields.append(('h', author))
            if issue:
                subfields.append(('n', issue))
            if label:
                subfields.append(('o', label))
            if page:
                subfields.append(('p', page))
            if ext_link:
                subfields.append(('r', ext_link))
            if title and volume and year and page:
                subfields.append(('s', '%s %s (%s) %s' % (title, volume, year, page)))
            elif textref:
                subfields.append(('m', textref))
            if title:
                subfields.append(('t', title))
            if volume:
                subfields.append(('v', volume))
            if year:
                subfields.append(('y', year))
            if subfields:
                record_add_field(rec, '999', ind1='C', ind2='5', subfields=subfields)
        if not no_pdf:
            from invenio.search_engine import search_pattern
            prev_version = search_pattern(p='0247_a:"%s" AND NOT 980:DELETED"' % (doi,))
            from invenio.bibdocfile import BibRecDocs
            old_pdf = False

            if prev_version:
                prev_rec = BibRecDocs(prev_version[0])
                try:
                    pdf_path = prev_rec.get_bibdoc('main').get_file(".pdf;pdfa", exact_docformat=True).fullpath
                    old_pdf = True
                    record_add_field(rec, 'FFT', subfields=[('a', pdf_path), ('n', 'main'), ('f', '.pdf;pdfa')])
                    self.logger.info('Leaving previously delivered PDF/A for: %s' % (doi,))
                except:
                    pass

            try:
                if exists(join(path, 'main_a-2b.pdf')):
                    record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main_a-2b.pdf')), ('n', 'main'), ('f', '.pdf;pdfa')])
                    self.logger.debug('Adding PDF/A to record: %s' % (doi,))
                elif exists(join(path, 'main.pdf')):
                    record_add_field(rec, 'FFT', subfields=[('a', join(path, 'main.pdf'))])
                else:
                    if not old_pdf:
                        raise MissingFFTError("Record %s doesn't contain PDF file." % (doi,))
            except MissingFFTError, err:
                register_exception(alert_admin=True, prefix="Elsevier paper: %s is missing PDF." % (doi,))
                self.logger.warning("Record %s doesn't contain PDF file." % (doi,))
 def test_BibDocs(self):
     """bibdocfile - BibDocs functions"""
     #add file
     my_bibrecdoc = BibRecDocs(2)
     my_bibrecdoc.add_new_file(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', 'Main', 'img_test', False, 'test add new file', 'test', '.jpg')
     my_new_bibdoc = my_bibrecdoc.get_bibdoc("img_test")
     value = my_bibrecdoc.list_bibdocs()
     self.assertEqual(len(value), 2)
     #get total file (bibdoc)
     self.assertEqual(my_new_bibdoc.get_total_size(), 91750)
     #get recid
     self.assertEqual(my_new_bibdoc.get_recid(), 2)
     #change name
     my_new_bibdoc.change_name('new_name')
     #get docname
     self.assertEqual(my_new_bibdoc.get_docname(), 'new_name')
     #get type
     self.assertEqual(my_new_bibdoc.get_type(), 'Main')
     #get id
     self.assert_(my_new_bibdoc.get_id() > 80)
     #set status
     my_new_bibdoc.set_status('new status')
     #get status
     self.assertEqual(my_new_bibdoc.get_status(), 'new status')
     #get base directory
     self.assert_(my_new_bibdoc.get_base_dir().startswith(CFG_WEBSUBMIT_FILEDIR))
     #get file number
     self.assertEqual(my_new_bibdoc.get_file_number(), 1)
     #add file new version
     my_new_bibdoc.add_file_new_version(CFG_PREFIX + '/lib/webtest/invenio/test.jpg', description= 'the new version', comment=None, format=None, flags=["PERFORM_HIDE_PREVIOUS"])
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2])
     #revert
     my_new_bibdoc.revert(1)
     self.assertEqual(my_new_bibdoc.list_versions(), [1, 2, 3])
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=3), 'test add new file')
     #get total size latest version
     self.assertEqual(my_new_bibdoc.get_total_size_latest_version(), 91750)
     #get latest version
     self.assertEqual(my_new_bibdoc.get_latest_version(), 3)
     #list latest files
     self.assertEqual(len(my_new_bibdoc.list_latest_files()), 1)
     self.assertEqual(my_new_bibdoc.list_latest_files()[0].get_version(), 3)
     #list version files
     self.assertEqual(len(my_new_bibdoc.list_version_files(1, list_hidden=True)), 1)
     #display
     value = my_new_bibdoc.display(version='', ln='en', display_hidden=True)
     self.assert_('>test add new file<' in value)
     #format already exist
     self.assertEqual(my_new_bibdoc.format_already_exists_p('.jpg'), True)
     #get file
     self.assertEqual(my_new_bibdoc.get_file('.jpg', version='1').get_version(), 1)
     #set description
     my_new_bibdoc.set_description('new description', '.jpg', version=1)
     #get description
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new description')
     #set comment
     my_new_bibdoc.set_description('new comment', '.jpg', version=1)
     #get comment
     self.assertEqual(my_new_bibdoc.get_description('.jpg', version=1), 'new comment')
     #get history
     assert len(my_new_bibdoc.get_history()) > 0
     #delete file
     my_new_bibdoc.delete_file('.jpg', 2)
     #list all files
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     #delete file
     my_new_bibdoc.delete_file('.jpg', 3)
     #add new format
     my_new_bibdoc.add_file_new_format(CFG_PREFIX + '/lib/webtest/invenio/test.gif', version=None, description=None, comment=None, format=None)
     self.assertEqual(len(my_new_bibdoc.list_all_files()), 2)
     #delete file
     my_new_bibdoc.delete_file('.jpg', 1)
     #delete file
     my_new_bibdoc.delete_file('.gif', 1)
     #empty bibdoc
     self.assertEqual(my_new_bibdoc.empty_p(), True)
     #hidden?
     self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), False)
     #hide
     my_new_bibdoc.set_flag('HIDDEN', '.jpg', version=1)
     #hidden?
     self.assertEqual(my_new_bibdoc.hidden_p('.jpg', version=1), True)
     #add and get icon
     my_new_bibdoc.add_icon( CFG_PREFIX + '/lib/webtest/invenio/icon-test.gif')
     value =  my_bibrecdoc.list_bibdocs()[1]
     self.assertEqual(value.get_icon(), my_new_bibdoc.get_icon())
     #delete icon
     my_new_bibdoc.delete_icon()
     #get icon
     self.assertEqual(my_new_bibdoc.get_icon(), None)
     #delete
     my_new_bibdoc.delete()
     self.assertEqual(my_new_bibdoc.deleted_p(), True)
     #undelete
     my_new_bibdoc.undelete(previous_status='')
Ejemplo n.º 25
0
def goto(type, document='', number=0, lang='en', modif=0):
    today = time.strftime('%Y-%m-%d')
    if type == 'SRR':
        ## We would like a CERN Staff Rules and Regulations
        recids = perform_request_search(cc='Staff Rules and Regulations', f="925__a:1996-01-01->%s 925__b:%s->9999-99-99" % (today, today))
        recid = recids[-1]
        reportnumber = get_fieldvalues(recid, '037__a')[0]
        edition = int(reportnumber[-2:]) ## e.g. CERN-STAFF-RULES-ED08
        return BibRecDocs(recid).get_bibdoc(make_cern_ssr_docname(lang, edition, modif)).get_file('.pdf').get_url()
    elif type == "OPER-CIRC":
        recids = perform_request_search(cc="Operational Circulars", p="reportnumber:\"CERN-OPER-CIRC-%s-*\"" % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        try:
            return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
        except InvenioBibDocFileError:
            return bibrecdocs.get_bibdoc(documents[document]).get_file('.PDF').get_url()
    elif type == 'ADMIN-CIRC':
        recids = perform_request_search(cc="Administrative Circulars", p='reportnumber:"CERN-ADMIN-CIRC-%s-*"' % number, sf="925__a")
        recid = recids[-1]
        documents = {}
        bibrecdocs = BibRecDocs(recid)
        for docname in bibrecdocs.get_bibdoc_names():
            ldocname = docname.lower()
            if 'implementation' in ldocname:
                _register_document(documents, docname, 'implementation-en')
            elif 'application' in ldocname:
                _register_document(documents, docname, 'implementation-fr')
            elif 'archiving' in ldocname:
                _register_document(documents, docname, 'archiving-en')
            elif 'archivage' in ldocname:
                _register_document(documents, docname, 'archiving-fr')
            elif 'annexe' in ldocname or 'annexes_fr' in ldocname:
                _register_document(documents, docname, 'annex-fr')
            elif 'annexes_en' in ldocname or 'annex' in ldocname:
                _register_document(documents, docname, 'annex-en')
            elif '_en_' in ldocname or '_eng_' in ldocname or '_angl_' in ldocname:
                _register_document(documents, docname, 'en')
            elif '_fr_' in ldocname:
                _register_document(documents, docname, 'fr')
        try:
            return bibrecdocs.get_bibdoc(documents[document]).get_file('.pdf').get_url()
        except InvenioBibDocFileError:
            return bibrecdocs.get_bibdoc(documents[document]).get_file('.PDF').get_url()
Ejemplo n.º 26
0
def Move_Files_to_Storage(parameters, curdir, form, user_info=None):
    """
    The function moves files received from the standard submission's
    form through file input element(s). The document are assigned a
    'doctype' (or category) corresponding to the file input element
    (eg. a file uploaded throught 'DEMOPIC_FILE' will go to
    'DEMOPIC_FILE' doctype/category).

    Websubmit engine builds the following file organization in the
    directory curdir/files:

                  curdir/files
                        |
      _____________________________________________________________________
            |                                   |                          |
      ./file input 1 element's name      ./file input 2 element's name    ....
         (for eg. 'DEMOART_MAILFILE')       (for eg. 'DEMOART_APPENDIX')
         |                                     |
      test1.pdf                             test2.pdf


    There is only one instance of all possible extension(pdf, gz...) in each part
    otherwise we may encounter problems when renaming files.

    + parameters['rename']: if given, all the files in curdir/files
      are renamed.  parameters['rename'] is of the form:
      <PA>elemfilename[re]</PA>* where re is an regexp to select(using
      re.sub) what part of the elem file has to be selected.
      e.g: <PA>file:TEST_FILE_RN</PA>

    + parameters['documenttype']: if given, other formats are created.
      It has 2 possible values: - if "picture" icon in gif format is created
                                - if "fulltext" ps, gz .... formats are created

    + parameters['paths_and_suffixes']: directories to look into and
      corresponding suffix to add to every file inside. It must have
      the same structure as a Python dictionnary of the following form
      {'FrenchAbstract':'french', 'EnglishAbstract':''}

      The keys are the file input element name from the form <=>
      directories in curdir/files The values associated are the
      suffixes which will be added to all the files in
      e.g. curdir/files/FrenchAbstract

    + parameters['iconsize'] need only if 'icon' is selected in
      parameters['documenttype']

    + parameters['paths_and_restrictions']: the restrictions to apply
      to each uploaded file. The parameter must have the same
      structure as a Python dictionnary of the following form:
      {'DEMOART_APPENDIX':'restricted'}
      Files not specified in this parameter are not restricted.
      The specified restrictions can include a variable that can be
      replaced at runtime, for eg:
      {'DEMOART_APPENDIX':'restricted to <PA>file:SuE</PA>'}

    + parameters['paths_and_doctypes']: if a doctype is specified,
      the file will be saved under the 'doctype/collection' instead
      of under the default doctype/collection given by the name
      of the upload element that was used on the websubmit interface.
      to configure the doctype in websubmit, enter the value as in a
      dictionnary, for eg:
      {'PATHS_SWORD_UPL' : 'PUSHED_TO_ARXIV'} -> from
      Demo_Export_Via_Sword [DEMOSWR] Document Types
    """

    global sysno
    paths_and_suffixes = parameters['paths_and_suffixes']
    paths_and_restrictions = parameters['paths_and_restrictions']
    rename = parameters['rename']
    documenttype = parameters['documenttype']
    iconsizes = parameters['iconsize'].split(',')
    paths_and_doctypes = parameters['paths_and_doctypes']

    ## Create an instance of BibRecDocs for the current recid(sysno)
    bibrecdocs = BibRecDocs(sysno)

    paths_and_suffixes = get_dictionary_from_string(paths_and_suffixes)

    paths_and_restrictions = get_dictionary_from_string(paths_and_restrictions)

    paths_and_doctypes = get_dictionary_from_string(paths_and_doctypes)

    ## Go through all the directories specified in the keys
    ## of parameters['paths_and_suffixes']
    for path in paths_and_suffixes.keys():
        ## Check if there is a directory for the current path
        if os.path.exists("%s/files/%s" % (curdir, path)):
            ## Retrieve the restriction to apply to files in this
            ## directory
            restriction = paths_and_restrictions.get(path, '')
            restriction = re.sub('<PA>(?P<content>[^<]*)</PA>',
                                 get_pa_tag_content,
                                 restriction)

            ## Go through all the files in curdir/files/path
            for current_file in os.listdir("%s/files/%s" % (curdir, path)):
                ## retrieve filename and extension
                dummy, filename, extension = decompose_file(current_file)
                if extension and extension[0] != ".":
                    extension = '.' + extension
                if len(paths_and_suffixes[path]) != 0:
                    extension = "_%s%s" % (paths_and_suffixes[path], extension)
                ## Build the new file name if rename parameter has been given
                if rename:
                    filename = re.sub('<PA>(?P<content>[^<]*)</PA>', \
                                      get_pa_tag_content, \
                                      parameters['rename'])

                if rename or len(paths_and_suffixes[path]) != 0 :
                    ## Rename the file
                    try:
                        # Write the log rename_cmd
                        fd = open("%s/rename_cmd" % curdir, "a+")
                        fd.write("%s/files/%s/%s" % (curdir, path, current_file) + " to " +\
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension) + "\n\n")
                        ## Rename
                        os.rename("%s/files/%s/%s" % (curdir, path, current_file), \
                                  "%s/files/%s/%s%s" % (curdir, path, filename, extension))

                        fd.close()
                        ## Save the new name in a text file in curdir so that
                        ## the new filename can be used by templates to created the recmysl
                        fd = open("%s/%s_RENAMED" % (curdir, path), "w")
                        fd.write("%s%s" % (filename, extension))
                        fd.close()
                    except OSError, err:
                        msg = "Cannot rename the file.[%s]"
                        msg %= str(err)
                        raise InvenioWebSubmitFunctionWarning(msg)
                fullpath = "%s/files/%s/%s%s" % (curdir, path, filename, extension)
                ## Check if there is any existing similar file
                if not bibrecdocs.check_file_exists(fullpath):
                    bibdoc = bibrecdocs.add_new_file(fullpath, doctype=paths_and_doctypes.get(path, path), never_fail=True)
                    bibdoc.set_status(restriction)
                    ## Fulltext
                    if documenttype == "fulltext":
                        additionalformats = createRelatedFormats(fullpath)
                        if len(additionalformats) > 0:
                            for additionalformat in additionalformats:
                                try:
                                    bibrecdocs.add_new_format(additionalformat)
                                except InvenioWebSubmitFileError:
                                    pass
                    ## Icon
                    elif documenttype == "picture":
                        has_added_default_icon_subformat_p = False
                        for iconsize in iconsizes:
                            try:
                                iconpath, iconname = create_icon({
                                    'input-file' : fullpath,
                                    'icon-scale' : iconsize,
                                    'icon-name' : None,
                                    'icon-file-format' : None,
                                    'multipage-icon' : False,
                                    'multipage-icon-delay' : 100,
                                    'verbosity' : 0,
                                })
                            except Exception, e:
                                register_exception(prefix='Impossible to create icon for %s (record %s)' % (fullpath, sysno), alert_admin=True)
                                continue
                            iconpath = os.path.join(iconpath, iconname)
                            docname = decompose_file(fullpath)[1]
                            try:
                                mybibdoc = bibrecdocs.get_bibdoc(docname)
                            except InvenioWebSubmitFileError:
                                mybibdoc = None
                            if iconpath is not None and mybibdoc is not None:
                                try:
                                    icon_suffix = iconsize.replace('>', '').replace('<', '').replace('^', '').replace('!', '')
                                    if not has_added_default_icon_subformat_p:
                                        mybibdoc.add_icon(iconpath)
                                        has_added_default_icon_subformat_p = True
                                    else:
                                        mybibdoc.add_icon(iconpath, subformat=CFG_WEBSUBMIT_DEFAULT_ICON_SUBFORMAT + "-" + icon_suffix)
                                    ## Save the new icon filename in a text file in curdir so that
                                    ## it can be used by templates to created the recmysl
                                    try:
                                        if not has_added_default_icon_subformat_p:
                                            fd = open("%s/%s_ICON" % (curdir, path), "w")
                                        else:
                                            fd = open("%s/%s_ICON_%s" % (curdir, path, iconsize + '_' + icon_suffix), "w")
                                        fd.write(os.path.basename(iconpath))
                                        fd.close()
                                    except OSError, err:
                                        msg = "Cannot store icon filename.[%s]"
                                        msg %= str(err)
                                        raise InvenioWebSubmitFunctionWarning(msg)
                                except InvenioWebSubmitFileError, e:
                                    # Most probably icon already existed.
                                    pass
                            elif mybibdoc is not None:
                                mybibdoc.delete_icon()
Ejemplo n.º 27
0
    def get_record(self,
                   path=None,
                   no_pdf=False,
                   test=False,
                   refextract_callback=None):
        """Convert a record to MARCXML format.

        :param path: path to a record.
        :type path: string
        :param test: flag to determine if it is a test call.
        :type test: bool
        :param refextract_callback: callback to be used to extract
                                    unstructured references. It should
                                    return a marcxml formated string
                                    of the reference.
        :type refextract_callback: callable

        :returns: marcxml formated string.
        """
        xml_doc = self.get_article(path)
        rec = create_record()
        title = self.get_title(xml_doc)
        if title:
            record_add_field(rec, '245', subfields=[('a', title)])
        (journal, dummy, volume, issue, first_page, last_page, year,
         start_date, doi) = self.get_publication_information(xml_doc, path)
        if not journal:
            journal = self.get_article_journal(xml_doc)
        if start_date:
            record_add_field(rec,
                             '260',
                             subfields=[('c', start_date), ('t', 'published')])
        else:
            record_add_field(rec,
                             '260',
                             subfields=[('c', time.strftime('%Y-%m-%d'))])
        if doi:
            record_add_field(rec,
                             '024',
                             ind1='7',
                             subfields=[('a', doi), ('2', 'DOI')])
        license, license_url = self.get_license(xml_doc)
        if license and license_url:
            record_add_field(rec,
                             '540',
                             subfields=[('a', license), ('u', license_url)])
        elif license_url:
            record_add_field(rec, '540', subfields=[('u', license_url)])
        self.logger.info("Creating record: %s %s" % (path, doi))
        authors = self.get_authors(xml_doc)
        first_author = True
        for author in authors:
            author_name = (author['surname'], author.get('given_name')
                           or author.get('initials'))
            subfields = [('a', '%s, %s' % author_name)]
            if 'orcid' in author:
                subfields.append(('j', author['orcid']))
            if 'affiliation' in author:
                for aff in author["affiliation"]:
                    subfields.append(('v', aff))

                if self.extract_nations:
                    add_nations_field(subfields)

            if author.get('email'):
                subfields.append(('m', author['email']))
            if first_author:
                record_add_field(rec, '100', subfields=subfields)
                first_author = False
            else:
                record_add_field(rec, '700', subfields=subfields)

        abstract = self.get_abstract(xml_doc)
        if abstract:
            record_add_field(rec,
                             '520',
                             subfields=[('a', abstract), ('9', 'Elsevier')])
        record_copyright = self.get_copyright(xml_doc)
        if record_copyright:
            record_add_field(rec, '542', subfields=[('f', record_copyright)])
        keywords = self.get_keywords(xml_doc)
        if self.CONSYN:
            for tag in xml_doc.getElementsByTagName('ce:collaboration'):
                collaboration = get_value_in_tag(tag, 'ce:text')
                if collaboration:
                    record_add_field(rec,
                                     '710',
                                     subfields=[('g', collaboration)])

            # We add subjects also as author keywords
            subjects = xml_doc.getElementsByTagName('dct:subject')
            for subject in subjects:
                for listitem in subject.getElementsByTagName('rdf:li'):
                    keyword = xml_to_text(listitem)
                    if keyword not in keywords:
                        keywords.append(keyword)
            for keyword in keywords:
                record_add_field(rec,
                                 '653',
                                 ind1='1',
                                 subfields=[('a', keyword), ('9', 'author')])
            journal, dummy = fix_journal_name(journal.strip(),
                                              self.journal_mappings)
            subfields = []
            doctype = self.get_doctype(xml_doc)
            try:
                page_count = int(last_page) - int(first_page) + 1
                record_add_field(rec,
                                 '300',
                                 subfields=[('a', str(page_count))])
            except ValueError:  # do nothing
                pass
            if doctype == 'err':
                subfields.append(('m', 'Erratum'))
            elif doctype == 'add':
                subfields.append(('m', 'Addendum'))
            elif doctype == 'pub':
                subfields.append(('m', 'Publisher Note'))
            elif doctype == 'rev':
                record_add_field(rec, '980', subfields=[('a', 'Review')])
            if journal:
                subfields.append(('p', journal))
            if first_page and last_page:
                subfields.append(('c', '%s-%s' % (first_page, last_page)))
            elif first_page:
                subfields.append(('c', first_page))
            if volume:
                subfields.append(('v', volume))
            if year:
                subfields.append(('y', year))
            record_add_field(rec, '773', subfields=subfields)
            if not test:
                if license:
                    url = 'http://www.sciencedirect.com/science/article/pii/'\
                          + path.split('/')[-1][:-4]
                    record_add_field(rec,
                                     '856',
                                     ind1='4',
                                     subfields=[('u', url),
                                                ('y', 'Elsevier server')])
                    record_add_field(rec,
                                     'FFT',
                                     subfields=[('a', path),
                                                ('t', 'INSPIRE-PUBLIC'),
                                                ('d', 'Fulltext')])
                else:
                    record_add_field(rec,
                                     'FFT',
                                     subfields=[('a', path), ('t', 'Elsevier'),
                                                ('o', 'HIDDEN')])
            record_add_field(rec, '980', subfields=[('a', 'HEP')])
            record_add_field(rec, '980', subfields=[('a', 'Citeable')])
            record_add_field(rec, '980', subfields=[('a', 'Published')])
            self._add_references(xml_doc, rec, refextract_callback)
        else:
            licence = 'http://creativecommons.org/licenses/by/3.0/'
            record_add_field(rec,
                             '540',
                             subfields=[('a', 'CC-BY-3.0'), ('u', licence)])
            if keywords:
                for keyword in keywords:
                    record_add_field(rec,
                                     '653',
                                     ind1='1',
                                     subfields=[('a', keyword),
                                                ('9', 'author')])

            pages = ''
            if first_page and last_page:
                pages = '{0}-{1}'.format(first_page, last_page)
            elif first_page:
                pages = first_page

            subfields = filter(lambda x: x[1] and x[1] != '-', [('p', journal),
                                                                ('v', volume),
                                                                ('n', issue),
                                                                ('c', pages),
                                                                ('y', year)])

            record_add_field(rec, '773', subfields=subfields)
            if not no_pdf:
                from invenio.search_engine import perform_request_search
                query = '0247_a:"%s" AND NOT 980:DELETED"' % (doi, )
                prev_version = perform_request_search(p=query)

                old_pdf = False

                if prev_version:
                    from invenio.bibdocfile import BibRecDocs
                    prev_rec = BibRecDocs(prev_version[0])
                    try:
                        pdf_path = prev_rec.get_bibdoc('main')
                        pdf_path = pdf_path.get_file(".pdf;pdfa",
                                                     exact_docformat=True)
                        pdf_path = pdf_path.fullpath
                        old_pdf = True
                        record_add_field(rec,
                                         'FFT',
                                         subfields=[('a', pdf_path),
                                                    ('n', 'main'),
                                                    ('f', '.pdf;pdfa')])
                        message = ('Leaving previously delivered PDF/A for: ' +
                                   doi)
                        self.logger.info(message)
                    except:
                        pass
                try:
                    if exists(join(path, 'main_a-2b.pdf')):
                        pdf_path = join(path, 'main_a-2b.pdf')
                        record_add_field(rec,
                                         'FFT',
                                         subfields=[('a', pdf_path),
                                                    ('n', 'main'),
                                                    ('f', '.pdf;pdfa')])
                        self.logger.debug('Adding PDF/A to record: %s' %
                                          (doi, ))
                    elif exists(join(path, 'main.pdf')):
                        pdf_path = join(path, 'main.pdf')
                        record_add_field(rec,
                                         'FFT',
                                         subfields=[('a', pdf_path)])
                    else:
                        if not old_pdf:
                            message = "Record " + doi
                            message += " doesn't contain PDF file."
                            self.logger.warning(message)
                            raise MissingFFTError(message)
                except MissingFFTError:
                    message = "Elsevier paper: %s is missing PDF." % (doi, )
                    register_exception(alert_admin=True, prefix=message)
                version = self.get_elsevier_version(find_package_name(path))
                record_add_field(rec, '583', subfields=[('l', version)])
                xml_path = join(path, 'main.xml')
                record_add_field(rec, 'FFT', subfields=[('a', xml_path)])
                record_add_field(rec,
                                 '980',
                                 subfields=[('a', 'SCOAP3'),
                                            ('b', 'Elsevier')])
        try:
            return record_xml_output(rec)
        except UnicodeDecodeError:
            message = "Found a bad char in the file for the article " + doi
            sys.stderr.write(message)
            return ""