def _get_formated_record(record_id, output_format, update_commands, language, outputTags="", run_diff=True, checked=True): """Returns a record in a given format @param record_id: the ID of record to format @param output_format: an output format code (or short identifier for the output format) @param update_commands: list of commands used to update record contents @param language: the language to use to format the record @param run_diff: determines if we want to run _get_recodr_diff function, which sometimes takes too much time """ if update_commands and checked: # Modify the bibrecord object with the appropriate actions updated_record = _get_updated_record(record_id, update_commands) textmarc_options = { "aleph-marc": 0, "correct-mode": 1, "append-mode": 0, "delete-mode": 0, "insert-mode": 0, "replace-mode": 0, "text-marc": 1 } old_record = search_engine.get_record(recid=record_id) old_record_textmarc = xmlmarc2textmarc.create_marc_record( old_record, sysno="", options=textmarc_options) if "hm" == output_format: if update_commands and run_diff and checked: updated_record_textmarc = xmlmarc2textmarc.create_marc_record( updated_record, sysno="", options=textmarc_options) result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id) else: filter_tags = "All tags" not in outputTags and outputTags result = ['<pre>'] for line in old_record_textmarc.splitlines()[:-1]: if not filter_tags or line.split()[0].replace( '_', '') in outputTags: result.append("%09d " % record_id + line.strip()) result.append('</pre>') result = '\n'.join(result) else: if update_commands and checked: # No coloring of modifications in this case xml_record = bibrecord.record_xml_output(updated_record) else: xml_record = bibrecord.record_xml_output(old_record) result = bibformat.format_record(recID=None, of=output_format, xml_record=xml_record, ln=language) return result
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="", checked=True, displayed_records=None): """Returns a record in a given format @param record_id: the ID of record to format @param output_format: an output format code (or short identifier for the output format) @param update_commands: list of commands used to update record contents @param language: the language to use to format the record @param outputTags: the tags to be shown to the user @param checked: is the record checked by the user? @param displayed_records: records to be displayed on a given page @returns: record formated to be displayed or None """ if update_commands and checked: # Modify the bibrecord object with the appropriate actions updated_record = _get_updated_record(record_id, update_commands) textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0, "delete-mode":0, "insert-mode":0, "replace-mode":0, "text-marc":1} if record_id not in displayed_records: return old_record = search_engine.get_record(recid=record_id) old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options) if "hm" == output_format: if update_commands and checked: updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options) result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id) else: filter_tags = "All tags" not in outputTags and outputTags result = ['<pre>'] for line in old_record_textmarc.splitlines(): if not filter_tags or line.split()[0].replace('_', '') in outputTags: result.append("%09d " % record_id + line.strip()) result.append('</pre>') result = '\n'.join(result) else: if update_commands and checked: # No coloring of modifications in this case xml_record = bibrecord.record_xml_output(updated_record) else: xml_record = bibrecord.record_xml_output(old_record) result = bibformat.format_record(recID=None, of=output_format, xml_record=xml_record, ln=language) return result
def _create_marc(records_xml): """Creates MARC from MARCXML. @param records_xml: MARCXML containing information about the records @return: string containing information about the records in MARC format """ aleph_marc_output = "" records = bibrecord.create_records(records_xml) for (record, status_code, list_of_errors) in records: sysno = "" options = {"aleph-marc":0, "correct-mode":1, "append-mode":0, "delete-mode":0, "insert-mode":0, "replace-mode":0, "text-marc":1} aleph_record = xmlmarc2textmarc.create_marc_record(record, sysno, options) aleph_marc_output += aleph_record return aleph_marc_output
def _create_marc(records_xml): """Creates MARC from MARCXML. @param records_xml: MARCXML containing information about the records @return: string containing information about the records in MARC format """ aleph_marc_output = "" records = bibrecord.create_records(records_xml) for (record, status_code, list_of_errors) in records: sysno = "" options = { "aleph-marc": 0, "correct-mode": 1, "append-mode": 0, "delete-mode": 0, "insert-mode": 0, "replace-mode": 0, "text-marc": 1 } aleph_record = xmlmarc2textmarc.create_marc_record( record, sysno, options) aleph_marc_output += aleph_record return aleph_marc_output
def _create_marc(records_xml): """Creates MARC from MARCXML. @param records_xml: MARCXML containing information about the records @return: string containing information about the records in MARC format """ aleph_marc_output = "" records = bibrecord.create_records(records_xml) for (record, status_code, list_of_errors) in records: # The system number is in field 970a # By this reason it should exist in the MARC XML # otherwise it will be None in the output ALEPH marc sysno_options = {"text-marc": 0} sysno = xmlmarc2textmarc.get_sysno_from_record(record, sysno_options) if sysno == None: sysno = "" options = { "aleph-marc": 0, "correct-mode": 1, "append-mode": 0, "delete-mode": 0, "insert-mode": 0, "replace-mode": 0, "text-marc": 1 } aleph_record = xmlmarc2textmarc.create_marc_record( record, sysno, options) aleph_marc_output += aleph_record return aleph_marc_output
def _create_marc(records_xml): """Creates MARC from MARCXML. @param records_xml: MARCXML containing information about the records @return: string containing information about the records in MARC format """ aleph_marc_output = "" records = bibrecord.create_records(records_xml) for (record, status_code, list_of_errors) in records: # The system number is in field 970a # By this reason it should exist in the MARC XML # otherwise it will be None in the output ALEPH marc sysno_options = {"text-marc":0} sysno = xmlmarc2textmarc.get_sysno_from_record(record, sysno_options) if sysno == None: sysno = "" options = {"aleph-marc":0, "correct-mode":1, "append-mode":0, "delete-mode":0, "insert-mode":0, "replace-mode":0, "text-marc":1} aleph_record = xmlmarc2textmarc.create_marc_record(record, sysno, options) aleph_marc_output += aleph_record return aleph_marc_output
def _get_formated_record(record_id, output_format, update_commands, language, outputTags="", run_diff=True): """Returns a record in a given format @param record_id: the ID of record to format @param output_format: an output format code (or short identifier for the output format) @param update_commands: list of commands used to update record contents @param language: the language to use to format the record @param run_diff: determines if we want to run _get_recodr_diff function, which sometimes takes too much time """ if update_commands: # Modify te bibrecord object with the appropriate actions updated_record = _get_updated_record(record_id, update_commands) textmarc_options = {"aleph-marc":0, "correct-mode":1, "append-mode":0, "delete-mode":0, "insert-mode":0, "replace-mode":0, "text-marc":1} old_record = search_engine.get_record(recid=record_id) old_record_textmarc = xmlmarc2textmarc.create_marc_record(old_record, sysno="", options=textmarc_options) if "hm" == output_format: if update_commands and run_diff: updated_record_textmarc = xmlmarc2textmarc.create_marc_record(updated_record, sysno="", options=textmarc_options) result = _get_record_diff(old_record_textmarc, updated_record_textmarc, outputTags, record_id) else: filter_tags = "All tags" not in outputTags and outputTags result = ['<pre>'] for line in old_record_textmarc.splitlines()[:-1]: if not filter_tags or line.split()[0].replace('_', '') in outputTags: result.append("%09d " % record_id + line.strip()) result.append('</pre>') result = '\n'.join(result) else: if update_commands: # No coloring of modifications in this case xml_record = bibrecord.record_xml_output(updated_record) else: xml_record = bibrecord.record_xml_output(old_record) result = bibformat.format_record(recID=None, of=output_format, xml_record=xml_record, ln=language) return result
def transform_record_to_marc(record, options={'text-marc':1, 'aleph-marc':0}): """ This function will transform a given bibrec record into marc using methods from xmlmarc2textmarc in invenio.textutils. The function returns the record as a MARC string. @param record: bibrec structure for record to transform @type record: dict @param options: dictionary describing type of MARC record. Defaults to textmarc. @type options: dict @return resulting MARC record as string """ sysno = get_sysno_from_record(record, options) # Note: Record dict is copied as create_marc_record() perform deletions return create_marc_record(record.copy(), sysno, options)
def parse_noresultfile(data, recid_patterns=(re_original_id,), sysno_patterns=None): """ This function will look for the original recid in 001 and any matching recids from given regular expression patterns in the textmarc format of given record. Returns a list of BibRec structure with found recids for original and matching records. """ record_pairs = [] sysno_gen = get_sysno_generator() options = {'text-marc':1, 'aleph-marc':0} for match in data: original_record_bibrec = create_records(match)[0][0] rec_id = record_get_field_value(original_record_bibrec, '001') sysno = sysno_gen.next() original_record_marc = create_marc_record(original_record_bibrec, sysno, options) matching_result_recids = [] for pattern in recid_patterns: matches = pattern.findall(original_record_marc) for match in matches: if type(match) is tuple: for res in match: if res != "": matching_result_recids = [res] break elif type(match) is str: matching_result_recids = [match] break if len(matching_result_recids) > 0: break matching_result_sysnos = [] for pattern in sysno_patterns: matches = pattern.findall(original_record_marc) for match in matches: if type(match) is tuple: for res in match: if res != "": matching_result_sysnos = [res] break elif type(match) is str: matching_result_sysnos = [match] break if len(matching_result_sysnos) > 0: break record_pairs.append((rec_id, matching_result_recids, matching_result_sysnos)) return record_pairs
def transform_record_to_marc(record, options={ 'text-marc': 1, 'aleph-marc': 0 }): """ This function will transform a given bibrec record into marc using methods from xmlmarc2textmarc in invenio.textutils. The function returns the record as a MARC string. @param record: bibrec structure for record to transform @type record: dict @param options: dictionary describing type of MARC record. Defaults to textmarc. @type options: dict @return resulting MARC record as string """ sysno = get_sysno_from_record(record, options) # Note: Record dict is copied as create_marc_record() perform deletions return create_marc_record(record.copy(), sysno, options)
def parse_resultfile(data, recid_patterns=(re_original_id,), recids=[], sysno_patterns=None, preserved_tags=[]): """ This function will look for the original recid and any matching recids in a BibMatch result file containing references to matching records in comments before every record in MARCXML format. Returns a list of BibRec structure with found recids for original and matching records. """ record_pairs = [] sysno_gen = get_sysno_generator() options = {'text-marc':1, 'aleph-marc':0} for index, match in enumerate(data): original_record_bibrec = create_records(match)[0][0] if record_has_field(original_record_bibrec, '001'): rec_id = record_get_field_value(original_record_bibrec, '001') else: sysno = sysno_gen.next() original_record_marc = create_marc_record(original_record_bibrec, sysno, options) rec_id = "" for pattern in recid_patterns: matches = pattern.findall(original_record_marc) if len(matches) > 0: rec_id = matches[0] break if recids: matching_result_recids = [recids[index]] else: matching_result_recids = re_matched_recid.findall(match) matching_result_sysnos = [] preserved_fields = {} print preserved_tags for tag in preserved_tags: try: print 'doing it' + tag preserved_fields[tag] = original_record_bibrec[tag] except KeyError: pass record_pairs.append((rec_id, matching_result_recids, matching_result_sysnos, preserved_fields)) return record_pairs
def from_bibrec_to_marc(record, sysno="", options={'text-marc':1, 'aleph-marc':0}): """ This function will convert a BibRec object into textmarc string """ if not sysno: sysno_gen = get_sysno_generator() sysno = sysno_gen.next() return create_marc_record(record, sysno, options)
sys.stderr.write("\n Matched records : %d" % (len(match_results[1]), )) sys.stderr.write("\n Ambiguous records : %d" % (len(match_results[2]), )) sys.stderr.write("\n Fuzzy records : %d\n" % (len(match_results[3]), )) sys.stderr.write("=" * 35) sys.stderr.write("\n Total records : %d\n" % (len(records), )) if not noprocess: options = {'text-marc': 1, 'aleph-marc': 0} for record, results in recs_out: if textmarc_output: # FIXME: textmarc output does not print matching results sysno = get_sysno_from_record(record, options) print create_marc_record(record, sysno, options) else: print results print record_xml_output(record) if batch_output: i = 0 options = {'text-marc': 1, 'aleph-marc': 0} outputs = ['new', 'matched', 'ambiguous', 'fuzzy'] for result in match_results: filename = "%s.%s" % (batch_output, outputs[i]) file_fd = open(filename, "w") for record, results in result: out = [] if textmarc_output: # FIXME: textmarc output does not print matching results
if verbose: sys.stderr.write("\n\n Bibmatch report\n") sys.stderr.write("=" * 35) sys.stderr.write("\n New records : %d" % len(match_results[0])) sys.stderr.write("\n Matched records : %d" % len(match_results[1])) sys.stderr.write("\n Ambiguous records : %d" % len(match_results[2])) sys.stderr.write("\n Fuzzy records : %d\n" % len(match_results[3])) sys.stderr.write("=" * 35) sys.stderr.write("\n Total records : %d\n" % len(records)) if not noprocess: options = {'text-marc':1, 'aleph-marc':0} for record in recs_out: if textmarc_output: sysno = get_sysno_from_record(record[0], options) print create_marc_record(record[0], sysno, options) else: print record[3] print record_xml_output(record[0]) if batch_output: i = 0 options = {'text-marc':1, 'aleph-marc':0} for result in match_results: filename = "%s.%i" % (batch_output, i) file_fd = open(filename,"w") for record in result: out = "" if textmarc_output: sysno = get_sysno_from_record(record[0], options) out += create_marc_record(record[0], sysno, options)
sys.stderr.write("\n\n Bibmatch report\n") sys.stderr.write("=" * 35) sys.stderr.write("\n New records : %d" % (len(match_results[0]),)) sys.stderr.write("\n Matched records : %d" % (len(match_results[1]),)) sys.stderr.write("\n Ambiguous records : %d" % (len(match_results[2]),)) sys.stderr.write("\n Fuzzy records : %d\n" % (len(match_results[3]),)) sys.stderr.write("=" * 35) sys.stderr.write("\n Total records : %d\n" % (len(records),)) if not noprocess: options = {'text-marc':1, 'aleph-marc':0} for record, results in recs_out: if textmarc_output: # FIXME: textmarc output does not print matching results sysno = get_sysno_from_record(record, options) print create_marc_record(record, sysno, options) else: print results print record_xml_output(record) if batch_output: i = 0 options = {'text-marc':1, 'aleph-marc':0} outputs = ['new', 'matched', 'ambiguous', 'fuzzy'] for result in match_results: filename = "%s.%s" % (batch_output, outputs[i]) file_fd = open(filename, "w") for record, results in result: out = [] if textmarc_output: # FIXME: textmarc output does not print matching results