def parse_reference_line(ref_line, kbs, bad_titles_count={}): """Parse one reference line @input a string representing a single reference bullet @output parsed references (a list of elements objects) """ # Strip the 'marker' (e.g. [1]) from this reference line: (line_marker, ref_line) = remove_reference_line_marker(ref_line) # Find DOI sections in citation (ref_line, identified_dois) = identify_and_tag_DOI(ref_line) # Identify and replace URLs in the line: (ref_line, identified_urls) = identify_and_tag_URLs(ref_line) # Tag <cds.JOURNAL>, etc. tagged_line, bad_titles_count = tag_reference_line(ref_line, kbs, bad_titles_count) # Debug print tagging (authors, titles, volumes, etc.) write_message('* tags %r' % tagged_line, verbose=9) # Using the recorded information, create a MARC XML representation # of the rebuilt line: # At the same time, get stats of citations found in the reference line # (titles, urls, etc): citation_elements, line_marker, counts = \ parse_tagged_reference_line(line_marker, tagged_line, identified_dois, identified_urls) # Transformations on elements citation_elements = split_volume_from_journal(citation_elements) citation_elements = format_volume(citation_elements) citation_elements = handle_special_journals(citation_elements, kbs) citation_elements = format_report_number(citation_elements) citation_elements = format_author_ed(citation_elements) citation_elements = look_for_books(citation_elements, kbs) citation_elements = format_hep(citation_elements) citation_elements = remove_b_for_nucl_phys(citation_elements) citation_elements = mangle_volume(citation_elements) # Split the reference in multiple ones if needed splitted_citations = split_citations(citation_elements) # Remove references with only misc text splitted_citations = remove_invalid_references(splitted_citations) # Find year splitted_citations = add_year_elements(splitted_citations) # For debugging puposes print_citations(splitted_citations, line_marker) return splitted_citations, line_marker, counts, bad_titles_count
def extract_journal_reference(line): """Extracts the journal reference from MARC field 773 and parses for specific journal information. Parameter: line - field 773__x, the raw journal ref Return: list of tuples with data values""" tagged_line = tag_reference_line(line, get_kbs(), {})[0] if tagged_line is None: return None elements, dummy_marker, dummy_stats = parse_tagged_reference_line("", tagged_line, [], []) for element in elements: if element["type"] == "JOURNAL": return element
def extract_journal_reference(line): """Extracts the journal reference from MARC field 773 and parses for specific journal information. Parameter: line - field 773__x, the raw journal ref Return: list of tuples with data values""" tagged_line = tag_reference_line(line, get_kbs(), {})[0] if tagged_line is None: return None elements, dummy_marker, dummy_stats = parse_tagged_reference_line( '', tagged_line, [], []) for element in elements: if element['type'] == 'JOURNAL': return element