def test_add_identical_conflict_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added identical 888 to Rev2(100/970), No conflict Expected""" upload_conf_rec = xml_marc_to_records(self.rev2_add_conf_field) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() #print "%s %s" % (upload_conf_rec[0], orig_recs[0]), self.data self.assert_(rev_verifier.verify_revision(upload_conf_rec[0], orig_recs[0], 'replace'))
def test_invalid_operation(self): """ BibUpload Revision Verifier - Incorrect opt_mode parameter.""" upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() for item in ["append", "format", "insert", "delete", "reference"]: self.assertEqual(rev_verifier.verify_revision(upload_recs[0], orig_recs[0], item), None)
def test_add_new_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 300 to Rev2(100/970), Patch Generated for 300""" upload_recs = xml_marc_to_records(self.rev2_add_field) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace") self.assertEqual("correct", opt_mode) self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch), "")
def test_add_identical_conflict_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added identical 888 to Rev2(100/970), No conflict Expected""" upload_conf_rec = xml_marc_to_records(self.rev2_add_conf_field) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() #print "%s %s" % (upload_conf_rec[0], orig_recs[0]), self.data self.assert_( rev_verifier.verify_revision(upload_conf_rec[0], orig_recs[0], 'replace'))
def test_correcting_added_field_with_diff_ind(self): """ BibUpload Revision Verifier - Rev3-100/970__/888, Added 970CP in Rev2(100/970__), Patch Generated for 970CP""" upload_recs = xml_marc_to_records(self.rev2_mod_field_diff_ind) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace") self.assertEqual("correct", opt_mode) self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_diff_ind), "")
def test_interchanged_fields(self): """ BibUpload Revision Verifier - Rev1--100-1/100-2/100-3/970/888, Rev1-Up--100-2/100-3/100-1/970/888, Patch Generated for 100""" upload_recs = xml_marc_to_records(self.rev1_mod) orig_recs = xml_marc_to_records(self.rev1) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace") self.assertEqual("correct", opt_mode) self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch), "")
def test_unchanged_record_upload(self): """ BibUpload Revision Verifier - Uploading Unchanged Record, Raise UnchangedRecordError""" upload_recs = xml_marc_to_records(self.data["rev3"][0]) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() self.assertRaises( InvenioBibUploadUnchangedRecordError, rev_verifier.verify_revision, upload_recs[0], orig_recs[0], "replace" )
def test_add_conflict_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 888 to Rev2(100/970), Conflict Expected""" upload_conf_rec = xml_marc_to_records(self.rev2_add_conf_field) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() self.assertRaises(InvenioBibUploadConflictingRevisionsError, \ rev_verifier.verify_revision, \ upload_conf_rec[0], \ orig_recs[0], \ 'replace')
def test_add_identical_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 100 to Rev2(100/970), Patch Generated for 100""" upload_identical_rec = xml_marc_to_records(self.rev2_add_sim_field) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_identical_rec[0], \ orig_recs[0], \ 'replace') self.assertEqual('correct', opt_mode) self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_identical_field), '')
def test_conflicting_modfield(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 100 to Rev2(100/970), Rev3 100 modified, Conflict Expected""" upload_identical_rec = xml_marc_to_records(self.rev2_add_sim_field) orig_recs = xml_marc_to_records(self.rev3_mod) rev_verifier = RevisionVerifier() self.assertRaises(InvenioBibUploadConflictingRevisionsError, \ rev_verifier.verify_revision, \ upload_identical_rec[0], \ orig_recs[0], \ 'replace')
def test_invalid_operation(self): """ BibUpload Revision Verifier - Incorrect opt_mode parameter.""" upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() for item in ['append', 'format', 'insert', 'delete', 'reference']: self.assertEqual(rev_verifier.verify_revision( upload_recs[0], \ orig_recs[0], \ item), None)
def test_unchanged_record_upload(self): """ BibUpload Revision Verifier - Uploading Unchanged Record, Raise UnchangedRecordError""" upload_recs = xml_marc_to_records(self.data['rev3'][0]) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() self.assertRaises(InvenioBibUploadUnchangedRecordError, \ rev_verifier.verify_revision, \ upload_recs[0], \ orig_recs[0], \ 'replace')
def test_missing_revision(self): """ BibUpload Revision Verifier - Missing 005 Tag scenario, Raise Missing005Error.""" self.rev2_modified = self.rev2_modified.replace('<controlfield tag="005">20110101000000.0</controlfield>', "") upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() self.assertRaises( InvenioBibUploadMissing005Error, rev_verifier.verify_revision, upload_recs[0], orig_recs[0], "replace" )
def test_for_special_delete_field(self): """ BibUpload Revision Verifier - Rev1-100/300, Modified 100 in Rev1-Mod, Deleted 300 in Rev1-Mod (100/300), Patch for DELETE generated""" upload_rec = xml_marc_to_records(self.rev1_mod) orig_rec = xml_marc_to_records(self.rev1) rev_verifier = RevisionVerifier() (opt_mode, final_patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_rec[0], \ orig_rec[0], \ 'replace') self.assertEqual('correct', opt_mode) self.failUnless((compare_xmbuffers(self.patch_1, record_xml_output(final_patch))!='') or \ (compare_xmbuffers(self.patch_2, record_xml_output(final_patch))!=''))
def test_conflicting_deleted_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Modified 970 in Rev2(100/970), 970 removed in Rev3, Conflict Expected""" upload_conf_recs = xml_marc_to_records(self.rev2_mod_field) orig_recs = xml_marc_to_records(self.rev3_deleted) rev_verifier = RevisionVerifier() self.assertRaises( InvenioBibUploadConflictingRevisionsError, \ rev_verifier.verify_revision, \ upload_conf_recs[0], \ orig_recs[0], \ 'replace')
def test_add_new_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 300 to Rev2(100/970), Patch Generated for 300""" upload_recs = xml_marc_to_records(self.rev2_add_field) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], \ orig_recs[0], \ 'replace') self.assertEqual('correct', opt_mode) self.assertEqual( compare_xmbuffers(record_xml_output(patch), self.patch), '')
def test_add_different_conflict_field(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added different 888 to Rev2(100/970), Conflict Expected""" upload_conf_rec = xml_marc_to_records(self.rev2_add_conf_field_diff) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() # print "%s %s" % (upload_conf_rec[0], orig_recs[0]), self.data self.assertRaises( InvenioBibUploadConflictingRevisionsError, rev_verifier.verify_revision, upload_conf_rec[0], orig_recs[0], "replace", )
def test_conflicting_similarfield(self): """ BibUpload Revision Verifier - Rev3-100/970/888, Added 100 to Rev2(100/970), 100 added to Rev3, Conflict Expected""" upload_identical_rec = xml_marc_to_records(self.rev2_add_sim_field) orig_recs = xml_marc_to_records(self.rev3_add_sim_field) rev_verifier = RevisionVerifier() self.assertRaises( InvenioBibUploadConflictingRevisionsError, rev_verifier.verify_revision, upload_identical_rec[0], orig_recs[0], "replace", )
def test_interchanged_fields(self): """ BibUpload Revision Verifier - Rev1--100-1/100-2/100-3/970/888, Rev1-Up--100-2/100-3/100-1/970/888, Patch Generated for 100""" upload_recs = xml_marc_to_records(self.rev1_mod) orig_recs = xml_marc_to_records(self.rev1) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision( upload_recs[0], \ orig_recs[0], \ 'replace') self.assertEqual('correct', opt_mode) self.assertEqual( compare_xmbuffers(record_xml_output(patch), self.patch), '')
def test_correcting_added_field_with_diff_ind(self): """ BibUpload Revision Verifier - Rev3-100/970__/888, Added 970CP in Rev2(100/970__), Patch Generated for 970CP""" upload_recs = xml_marc_to_records(self.rev2_mod_field_diff_ind) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision( upload_recs[0], \ orig_recs[0], \ 'replace') self.assertEqual('correct', opt_mode) self.assertEqual( compare_xmbuffers(record_xml_output(patch), self.patch_diff_ind), '')
def test_invalid_revision(self): """ BibUpload Revision Verifier - Wrong Revision in the Upload Record, Raise InvalidRevisionError""" self.rev2_modified = self.rev2_modified.replace( '<controlfield tag="005">20110101000000.0</controlfield>', '<controlfield tag="005">20110101020304.0</controlfield>', ) rev_verifier = RevisionVerifier() upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data["rev3"][0]) self.assertRaises( InvenioBibUploadInvalidRevisionError, rev_verifier.verify_revision, upload_recs[0], orig_recs[0], "replace" )
def test_invalid_revision(self): """ BibUpload Revision Verifier - Wrong Revision in the Upload Record, Raise InvalidRevisionError""" self.rev2_modified = self.rev2_modified.replace( '<controlfield tag="005">20110101000000.0</controlfield>', \ '<controlfield tag="005">20110101020304.0</controlfield>') rev_verifier = RevisionVerifier() upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data['rev3'][0]) self.assertRaises(InvenioBibUploadInvalidRevisionError, \ rev_verifier.verify_revision, \ upload_recs[0], \ orig_recs[0], \ 'replace')
def test_correcting_del_field_add_field_diff_ind(self): """ BibUpload Revision Verifier - Rev3-100/970__/888, Deleted 970__ and Added 970CP in Rev2(100/970__), Patch Generated for 970__/970CP""" upload_recs = xml_marc_to_records(self.rev2_mod_del_one_add_one) orig_recs = xml_marc_to_records(self.data["rev3"][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision(upload_recs[0], orig_recs[0], "replace") self.assertEqual("correct", opt_mode) # NOTE:for multiple fields in patch it is better to compare with different possible patch strings # This is due to unsorted key-value pairs of generated patch dictionary # self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one), '') self.failUnless( (compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one) != "") or (compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one_2) != "") )
def test_missing_revision(self): """ BibUpload Revision Verifier - Missing 005 Tag scenario, Raise Missing005Error.""" self.rev2_modified = self.rev2_modified.replace( '<controlfield tag="005">20110101000000.0</controlfield>', \ '') upload_recs = xml_marc_to_records(self.rev2_modified) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() self.assertRaises(InvenioBibUploadMissing005Error, \ rev_verifier.verify_revision, \ upload_recs[0], \ orig_recs[0], \ 'replace')
def create_delete(message): """creates and deletes the record""" diff = message.getParam('diff') if diff: diff = int(str(diff)) else: diff = 5 # 5 secs older recid = bibupload.create_new_record() #rec_id = bibupload.create_new_record() #expected_rec_id = dbquery.run_sql("SELECT MAX(id) FROM bibrec")[0][0] + 1 xml_to_delete = """ <record> <datafield tag="100" ind1=" " ind2=" "> <subfield code="a">Test, Jane</subfield> <subfield code="u">Test Institute</subfield> </datafield> <datafield tag="100" ind1="4" ind2="7"> <subfield code="a">Test, Johnson</subfield> <subfield code="u">Test University</subfield> </datafield> <datafield tag="980" ind1="" ind2=""> <subfield code="c">DELETED</subfield> </datafield> </record> """ recs = bibupload.xml_marc_to_records(xml_to_delete) ret = bibupload.bibupload(recs[0], opt_mode='insert') recid = ret[1] message.setResults(Integer(recid)) change_date(recid, diff=diff)
def perform_upload_check(xml_record, mode): """ Performs a upload simulation with the given record and mode @return: string describing errors @rtype: string """ error_cache = [] def my_writer(msg, stream=sys.stdout, verbose=1): if verbose == 1: if 'DONE' not in msg: error_cache.append(msg.strip()) orig_writer = bibupload_module.write_message bibupload_module.write_message = my_writer error_cache.extend(perform_basic_upload_checks(xml_record)) if error_cache: # There has been some critical error return '\n'.join(error_cache) recs = xml_marc_to_records(xml_record) try: upload_mode = mode[2:] # Adapt input data for bibupload function if upload_mode == "r insert-or-replace": upload_mode = "replace_or_insert" for record in recs: if record: record_strip_empty_volatile_subfields(record) record_strip_empty_fields(record) bibupload(record, opt_mode=upload_mode, pretend=True) finally: bibupload_module.write_message = orig_writer return '\n'.join(error_cache)
def generate_keywords(req, recid, argd): """Extracts keywords from the fulltexts (if found) for the given recid. It first checks whether the keywords are not already stored in the temp file (maybe from the previous run). @var req: req object @var recid: record id @var argd: arguments passed from web @keyword store_keywords: boolean, whether to save records in the file @return: standard dictionary of kw objects or {} """ ln = argd['ln'] _ = gettext_set_language(ln) keywords = {} # check the files were not already generated abs_path = bibclassify_engine.get_tmp_file(recid) if os.path.exists(abs_path): try: # Try to load the data from the tmp file recs = bibupload.xml_marc_to_records(bibupload.open_marc_file(abs_path)) return record_get_keywords(recs[0]) except: pass # check it is allowed (for this user) to generate pages (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify') if exit_stat != 0: log.info('Access denied: ' + msg) msg = _("The site settings do not allow automatic keyword extraction") req.write(template.tmpl_page_msg(msg=msg)) return 0, keywords, None # register generation bibdocfiles = BibRecDocs(recid).list_latest_files() if bibdocfiles: # User arrived at a page, but no keywords are available inprogress, msg = _doc_already_submitted(recid) if argd['generate'] != 'yes': # Display a form and give them possibility to generate keywords if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg))) else: req.write(template.tmpl_page_generate_keywords(req=req, **argd)) return 0, keywords, None else: # after user clicked on "generate" button if inprogress: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _(msg) )) else: schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY) req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _('We have registered your request, the automated' 'keyword extraction will run after some time. Please return back in a while.'))) else: req.write(template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _("Unfortunately, we don't have a PDF fulltext for this record in the storage, \ keywords cannot be generated using an automated process."))) return 0, keywords, None
def tearDown(self): """Helper function that restores recID 3 MARCXML""" recs = bibupload.xml_marc_to_records(self.bibupload_xml) bibupload.bibupload(recs[0], opt_mode='delete') oai_harvest_daemon.oai_harvest_get = self.oai_harvest_get oai_harvest_dblayer.get_oai_src = self.get_oai_src run_sql("UPDATE bibrec SET modification_date=%s WHERE id=%s", (self.original_modification_date, self.recid,))
def main(argv): recID=0 opts,pargs=getopt.getopt(argv,'di:') verbose = False for opt, arg in opts: if opt == '-i': recID=arg if opt == '-d': verbose = True result=format_record(recID=recID,of='xm') if result: #change the result to MARC by applying a template if verbose: print result raw_input("go on?") result = bibconvert_xslt_engine.convert(result, "marcxmltoplain.xsl") #call a sub that changes the stuff to editable form, calls editor, #returns a string new = convert_edit(result) newr = to_marc(new) if verbose: #debug f=open('/tmp/debug', 'w') f.write(new) f.write(newr) f.close() print newr if upper(raw_input("Save to DB Y/N:")) =='Y': recs=xml_marc_to_records(''.join(newr)) response=bibupload(recs[0],opt_mode='replace') if response[0]:print "Error updating record: "+response[0]
def setUp(self, recid=RECID, arxiv_id=ARXIV_ID): self.recid = recid self.arxiv_id = arxiv_id self.arxiv_version = 1 self.bibupload_xml = """<record> <controlfield tag="001">%s</controlfield> <datafield tag="037" ind1=" " ind2=" "> <subfield code="a">arXiv:%s</subfield> <subfield code="9">arXiv</subfield> <subfield code="c">hep-ph</subfield> </datafield> </record>""" % (recid, arxiv_id) bibtask.setup_loggers() bibtask.task_set_task_param('verbose', 0) recs = bibupload.xml_marc_to_records(self.bibupload_xml) status, dummy, err = bibupload.bibupload(recs[0], opt_mode='correct') assert status == 0, err.strip() assert len(get_fieldvalues(recid, '037__a')) == 1 def mocked_oai_harvest_get(prefix, baseurl, harvestpath, verb, identifier): temp_fd, temp_path = mkstemp() os.write(temp_fd, ARXIV_OAI_RESPONSE % self.arxiv_version) os.close(temp_fd) return [temp_path] self.oai_harvest_get = oai_harvest_daemon.oai_harvest_get oai_harvest_daemon.oai_harvest_get = mocked_oai_harvest_get def mocked_get_oai_src(params={}): return [{'baseurl': ''}] self.get_oai_src = oai_harvest_dblayer.get_oai_src oai_harvest_dblayer.get_oai_src = mocked_get_oai_src
def test_correcting_del_field_add_field_diff_ind(self): """ BibUpload Revision Verifier - Rev3-100/970__/888, Deleted 970__ and Added 970CP in Rev2(100/970__), Patch Generated for 970__/970CP""" upload_recs = xml_marc_to_records(self.rev2_mod_del_one_add_one) orig_recs = xml_marc_to_records(self.data['rev3'][0]) rev_verifier = RevisionVerifier() (opt_mode, patch, dummy_affected_tags) = rev_verifier.verify_revision( upload_recs[0], \ orig_recs[0], \ 'replace') self.assertEqual('correct', opt_mode) #NOTE:for multiple fields in patch it is better to compare with different possible patch strings #This is due to unsorted key-value pairs of generated patch dictionary #self.assertEqual(compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one), '') self.failUnless((compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one)!='') \ or (compare_xmbuffers(record_xml_output(patch), self.patch_del_one_add_one_2)!=''))
def tearDown(self): """Helper function that restores recID 3 MARCXML""" recs = bibupload.xml_marc_to_records(self.bibupload_xml) bibupload.bibupload(recs[0], opt_mode='delete') oai_harvest_daemon.oai_harvest_get = self.oai_harvest_get oai_harvest_dblayer.get_oai_src = self.get_oai_src run_sql("UPDATE bibrec SET modification_date=%s WHERE id=%s", ( self.original_modification_date, self.recid, ))
def test_BibUpload_revision_verifier(self): """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios""" recs = xml_marc_to_records(self.rev1) # --> Revision 1 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='insert') self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, '005', '', '') recs = xml_marc_to_records(self.rev1) self.rev2 = self.rev2.replace('123456789', str(self.recid)) self.rev2 = self.rev2.replace('20110101000000.0', rev) self.rev1_modified = self.rev1_modified.replace( '123456789', str(self.recid)) self.rev1_modified = self.rev1_modified.replace( '20110101000000.0', rev) self.final_xm = self.final_xm.replace('123456789', str(self.recid)) recs = xml_marc_to_records(self.rev1) recs = xml_marc_to_records(self.rev2) # --> Revision 2 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) record = get_record(self.recid) self.rev2 = self.rev2.replace( rev, record_get_field_value(record, '005', '', '')) self.rev2_modified = self.rev2_modified.replace( '123456789', str(self.recid)) self.rev2_modified = self.rev2_modified.replace( '20110101000000.0', record_get_field_value(record, '005', '', '')) # --> Revision 1 modified submitted recs = xml_marc_to_records(self.rev1_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, '005', '', '') self.final_xm = self.final_xm.replace('20110101000000.0', rev) self.assertEqual( compare_xmbuffers(self.final_xm, print_record(self.recid, 'xm')), '') # --> Revision 2 modified submitted recs = xml_marc_to_records(self.rev2_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode='replace') self.check_record_consistency(self.recid) self.assertEquals(error, 2)
def test_BibUpload_revision_verifier(self): """ BibUpload Revision Verifier - Called from BibUpload Operation - Patch & Conflict Scenarios""" recs = xml_marc_to_records(self.rev1) # --> Revision 1 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="insert") self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, "005", "", "") recs = xml_marc_to_records(self.rev1) self.rev2 = self.rev2.replace("123456789", str(self.recid)) self.rev2 = self.rev2.replace("20110101000000.0", rev) self.rev1_modified = self.rev1_modified.replace("123456789", str(self.recid)) self.rev1_modified = self.rev1_modified.replace("20110101000000.0", rev) self.final_xm = self.final_xm.replace("123456789", str(self.recid)) recs = xml_marc_to_records(self.rev1) recs = xml_marc_to_records(self.rev2) # --> Revision 2 submitted error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) record = get_record(self.recid) self.rev2 = self.rev2.replace(rev, record_get_field_value(record, "005", "", "")) self.rev2_modified = self.rev2_modified.replace("123456789", str(self.recid)) self.rev2_modified = self.rev2_modified.replace( "20110101000000.0", record_get_field_value(record, "005", "", "") ) # --> Revision 1 modified submitted recs = xml_marc_to_records(self.rev1_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) record = get_record(self.recid) rev = record_get_field_value(record, "005", "", "") self.final_xm = self.final_xm.replace("20110101000000.0", rev) self.assertEqual(compare_xmbuffers(self.final_xm, print_record(self.recid, "xm")), "") # --> Revision 2 modified submitted recs = xml_marc_to_records(self.rev2_modified) error, self.recid, dummy_msg = bibupload(recs[0], opt_mode="replace") self.check_record_consistency(self.recid) self.assertEquals(error, 2)
def setUp(self): """ Sets up sample records for Modified Fields Scenarios.""" GenericBibUploadTest.setUp(self) # Rev 1 self.rev1 = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="a">Test, Field-1</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="a">Test, Field-2</subfield> </datafield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="a">Test, Field-3</subfield> </datafield> </record>""" # Rev 1 -- To Replace self.rev1_mod = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> </record>""" # Patch with SPECIAL DELETE FIELD-1 self.patch_1 = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> </record>""" # Patch with SPECIAL DELETE FIELD-2 self.patch_2 = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> </record>""" self.rev_to_insert = self.rev1.replace('<controlfield tag="001">123456789</controlfield>', '') self.rev_to_insert = self.rev_to_insert.replace('<controlfield tag="005">20110101000000.0</controlfield>','') rec = xml_marc_to_records(self.rev_to_insert) dummy_error, self.recid, dummy_msg = bibupload(rec[0], opt_mode='insert') self.check_record_consistency(self.recid) self.rev1 = self.rev1.replace('123456789', str(self.recid)) self.rev1_mod = self.rev1_mod.replace('123456789', str(self.recid)) self.patch_1 = self.patch_1.replace('123456789', str(self.recid)) self.patch_2 = self.patch_2.replace('123456789', str(self.recid)) record = get_record(self.recid) rev = record_get_field_value(record, '005') self.rev1 = self.rev1.replace('20110101000000.0', rev) self.rev1_mod = self.rev1_mod.replace('20110101000000.0', rev)
def init_test_records(): """ Initializes test records for revision verifying scenarios Inserts 1st version and then appends new field every 1 sec to create 2nd and 3rd version of the record Returns a dict of following format : {'id':recid, 'rev1':(rev1_rec, rev1_005), 'rev2':(rev2_rec, rev2_005tag), 'rev3':(rev3_rec, rev3_005tag)} """ # Rev 1 -- tag 100 rev1 = """ <record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> </record>""" # Append 970 to Rev1 rev1_append = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="970" ind1=" " ind2=" "> <subfield code="a">0003719PHOPHO</subfield> </datafield> </record>""" # Rev 2 -- Rev 1 + tag 970 rev2 = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> <datafield tag ="970" ind1=" " ind2=" "> <subfield code="a">0003719PHOPHO</subfield> </datafield> </record>""" # Append 888 to Rev2 rev2_append = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag="888" ind1=" " ind2=" "> <subfield code="a">dumb text</subfield> </datafield> </record>""" # Rev 3 -- Rev 2 + tag 888 rev3 = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> <datafield tag ="970" ind1=" " ind2=" "> <subfield code="a">0003719PHOPHO</subfield> </datafield> <datafield tag="888" ind1=" " ind2=" "> <subfield code="a">dumb text</subfield> </datafield> </record>""" init_details = {} insert_record = rev1.replace( '<controlfield tag="001">123456789</controlfield>', '') insert_record = insert_record.replace( '<controlfield tag="005">20110101000000.0</controlfield>', '') recs = xml_marc_to_records(insert_record) # --> Revision 1 submitted res = bibupload(recs[0], opt_mode='insert') recid = res[1] init_details['id'] = (str(recid), ) rec = get_record(recid) rev_tag = record_get_field_value(rec, '005', '', '') # update the test data rev1 = rev1.replace('123456789', str(recid)) rev1 = rev1.replace('20110101000000.0', rev_tag) rev1_append = rev1_append.replace('123456789', str(recid)) rev2 = rev2.replace('123456789', str(recid)) rev2 = rev2.replace('20110101000000.0', rev_tag) rev2_append = rev2_append.replace('123456789', str(recid)) rev3 = rev3.replace('123456789', str(recid)) init_details['rev1'] = (rev1, rev_tag) old_rev_tag = rev_tag # --> Revision 2 submitted recs = xml_marc_to_records(rev1_append) res = bibupload(recs[0], opt_mode='append') rec = get_record(recid) rev_tag = record_get_field_value(rec, '005') rev2 = rev2.replace(old_rev_tag, rev_tag) rev3 = rev3.replace('20110101000000.0', rev_tag) init_details['rev2'] = (rev2, rev_tag) old_rev_tag = rev_tag # --> Revision 3 submitted recs = xml_marc_to_records(rev2_append) res = bibupload(recs[0], opt_mode='append') rec = get_record(recid) rev_tag = record_get_field_value(rec, '005') rev3 = rev3.replace(old_rev_tag, rev_tag) init_details['rev3'] = (rev3, rev_tag) return init_details
def tearDown(self): """Helper function that restores recID 3 MARCXML""" recs = bibupload.xml_marc_to_records(self.bibupload_xml) bibupload.bibupload(recs[0], opt_mode='delete') oai_harvest_daemon.oai_harvest_get = self.oai_harvest_get oai_harvest_dblayer.get_oai_src = self.get_oai_src
def setUp(self): """ Sets up sample records for Modified Fields Scenarios.""" GenericBibUploadTest.setUp(self) # Rev 1 -- 100-1/100-2/100-3 self.rev1 = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester1, T</subfield> <subfield code="u">DESY1</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester2, T</subfield> <subfield code="u">DESY2</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester3, T</subfield> <subfield code="u">DESY3</subfield> </datafield> <datafield tag ="970" ind1=" " ind2=" "> <subfield code="a">0003719PHYPHY</subfield> </datafield> <datafield tag="888" ind1=" " ind2=" "> <subfield code="a">dumb text</subfield> </datafield> </record>""" # Rev 1 Modified -- 100-2/100-3/100-1 self.rev1_mod = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester2, T</subfield> <subfield code="u">DESY2</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester3, T</subfield> <subfield code="u">DESY3</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester1, T</subfield> <subfield code="u">DESY1</subfield> </datafield> <datafield tag ="970" ind1=" " ind2=" "> <subfield code="a">0003719PHYPHY</subfield> </datafield> <datafield tag="888" ind1=" " ind2=" "> <subfield code="a">dumb text</subfield> </datafield> </record>""" self.patch = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester2, T</subfield> <subfield code="u">DESY2</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester3, T</subfield> <subfield code="u">DESY3</subfield> </datafield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester1, T</subfield> <subfield code="u">DESY1</subfield> </datafield> </record>""" insert_record = self.rev1.replace( '<controlfield tag="001">123456789</controlfield>', '') insert_record = insert_record.replace( '<controlfield tag="005">20110101000000.0</controlfield>', '') recs = xml_marc_to_records(insert_record) # --> Revision 1 submitted res = bibupload(recs[0], opt_mode='insert') self.recid = res[1] self.check_record_consistency(self.recid) rec = get_record(self.recid) rev_tag = record_get_field_value(rec, '005', '', '') # update the test data self.rev1 = self.rev1.replace('123456789', str(self.recid)) self.rev1 = self.rev1.replace('20110101000000.0', rev_tag) self.rev1_mod = self.rev1_mod.replace('123456789', str(self.recid)) self.rev1_mod = self.rev1_mod.replace('20110101000000.0', rev_tag) self.patch = self.patch.replace('123456789', str(self.recid))
def setUp(self): """ Sets up sample records for Modified Fields Scenarios.""" GenericBibUploadTest.setUp(self) # Rev 1 self.rev1 = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="a">Test, Field-1</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="a">Test, Field-2</subfield> </datafield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="a">Test, Field-3</subfield> </datafield> </record>""" # Rev 1 -- To Replace self.rev1_mod = """<record> <controlfield tag="001">123456789</controlfield> <controlfield tag="005">20110101000000.0</controlfield> <datafield tag ="100" ind1=" " ind2=" "> <subfield code="a">Tester, T</subfield> <subfield code="u">DESY</subfield> </datafield> </record>""" # Patch with SPECIAL DELETE FIELD-1 self.patch_1 = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> </record>""" # Patch with SPECIAL DELETE FIELD-2 self.patch_2 = """<record> <controlfield tag="001">123456789</controlfield> <datafield tag ="300" ind1="C" ind2="P"> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> <datafield tag ="300" ind1=" " ind2=" "> <subfield code="0">__DELETE_FIELDS__</subfield> </datafield> </record>""" self.rev_to_insert = self.rev1.replace( '<controlfield tag="001">123456789</controlfield>', '') self.rev_to_insert = self.rev_to_insert.replace( '<controlfield tag="005">20110101000000.0</controlfield>', '') rec = xml_marc_to_records(self.rev_to_insert) dummy_error, self.recid, dummy_msg = bibupload(rec[0], opt_mode='insert') self.check_record_consistency(self.recid) self.rev1 = self.rev1.replace('123456789', str(self.recid)) self.rev1_mod = self.rev1_mod.replace('123456789', str(self.recid)) self.patch_1 = self.patch_1.replace('123456789', str(self.recid)) self.patch_2 = self.patch_2.replace('123456789', str(self.recid)) record = get_record(self.recid) rev = record_get_field_value(record, '005') self.rev1 = self.rev1.replace('20110101000000.0', rev) self.rev1_mod = self.rev1_mod.replace('20110101000000.0', rev)
def generate_keywords(req, recid, argd): """Extracts keywords from the fulltexts (if found) for the given recid. It first checks whether the keywords are not already stored in the temp file (maybe from the previous run). @var req: req object @var recid: record id @var argd: arguments passed from web @keyword store_keywords: boolean, whether to save records in the file @return: standard dictionary of kw objects or {} """ ln = argd['ln'] _ = gettext_set_language(ln) keywords = {} # check the files were not already generated abs_path = bibclassify_engine.get_tmp_file(recid) if os.path.exists(abs_path): try: # Try to load the data from the tmp file recs = bibupload.xml_marc_to_records( bibupload.open_marc_file(abs_path)) return record_get_keywords(recs[0]) except: pass # check it is allowed (for this user) to generate pages (exit_stat, msg) = acce.acc_authorize_action(req, 'runbibclassify') if exit_stat != 0: log.info('Access denied: ' + msg) msg = _("The site settings do not allow automatic keyword extraction") req.write(template.tmpl_page_msg(msg=msg)) return 0, keywords, None # register generation bibdocfiles = BibRecDocs(recid).list_latest_files() if bibdocfiles: # User arrived at a page, but no keywords are available inprogress, msg = _doc_already_submitted(recid) if argd['generate'] != 'yes': # Display a form and give them possibility to generate keywords if inprogress: req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _(msg))) else: req.write(template.tmpl_page_generate_keywords(req=req, **argd)) return 0, keywords, None else: # after user clicked on "generate" button if inprogress: req.write( template.tmpl_page_msg( msg='<div class="warningbox">%s</div>' % _(msg))) else: schedule_extraction(recid, taxonomy=bconfig.CFG_EXTRACTION_TAXONOMY) req.write( template. tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _( 'We have registered your request, the automated' 'keyword extraction will run after some time. Please return back in a while.' ))) else: req.write( template.tmpl_page_msg(msg='<div class="warningbox">%s</div>' % _( "Unfortunately, we don't have a PDF fulltext for this record in the storage, \ keywords cannot be generated using an automated process.")) ) return 0, keywords, None