Exemple #1
0
def build_record(counts, fields, recid=None, status_code=0):
    """Given a series of MARC XML-ized reference lines and a record-id, write a
       MARC XML record to the stdout stream. Include in the record some stats
       for the extraction job.
       The printed MARC XML record will essentially take the following
       structure:
        <record>
           <controlfield tag="001">1</controlfield>
           <datafield tag="999" ind1="C" ind2="5">
              [...]
           </datafield>
           [...]
           <datafield tag="999" ind1="C" ind2="6">
              <subfield code="a">
        Invenio/X.XX.X refextract/X.XX.X-timestamp-err-repnum-title-URL-misc
              </subfield>
           </datafield>
        </record>
       Timestamp, error(code), reportnum, title, URL, and misc will are of
       course take the relevant values.

       @param status_code: (integer)the status of reference-extraction for the
        given record: was there an error or not? 0 = no error; 1 = error.
       @param count_reportnum: (integer) - the number of institutional
        report-number citations found in the document's reference lines.
       @param count_title: (integer) - the number of journal title citations
        found in the document's reference lines.
       @param count_url: (integer) - the number of URL citations found in the
        document's reference lines.
       @param count_misc: (integer) - the number of sections of miscellaneous
        text (i.e. 999C5$m) from the document's reference lines.
       @param count_auth_group: (integer) - the total number of author groups
        identified ($h)
       @param recid: (string) - the record-id of the given document. (put into
        001 field.)
       @param xml_lines: (list) of strings. Each string in the list contains a
        group of MARC XML 999C5 datafields, making up a single reference line.
        These reference lines will make up the document body.
       @return: The entire MARC XML textual output, plus recognition statistics.
    """
    record = BibRecord(recid=recid)
    record['999'] = fields
    field = record.add_field(CFG_REFEXTRACT_TAG_ID_EXTRACTION_STATS)
    stats_str = "%(status)s-%(reportnum)s-%(title)s-%(author)s-%(url)s-%(doi)s-%(misc)s" % {
           'status'             : status_code,
           'reportnum'          : counts['reportnum'],
           'title'              : counts['title'],
           'author'             : counts['auth_group'],
           'url'                : counts['url'],
           'doi'                : counts['doi'],
           'misc'               : counts['misc'],
    }
    field.add_subfield(CFG_REFEXTRACT_SUBFIELD_EXTRACTION_STATS,
                       stats_str)
    field.add_subfield(CFG_REFEXTRACT_SUBFIELD_EXTRACTION_TIME,
                       datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    field.add_subfield(CFG_REFEXTRACT_SUBFIELD_EXTRACTION_VERSION,
                       CFG_REFEXTRACT_VERSION)

    return record
    def test_simple(self):
        from invenio.legacy.docextract.record import BibRecord
        from invenio.legacy.docextract.convert_journals import convert_journals
        record = BibRecord()
        record.add_subfield('100__a', 'Test Journal Name')
        record.add_subfield('773__p', 'Test Journal Name')
        record.add_subfield('999C5s', 'Test Journal Name,100,10')
        converted_record = convert_journals(self.kb, record)

        expected_record = BibRecord()
        expected_record.add_subfield('100__a', 'Test Journal Name')
        expected_record.add_subfield('773__p', 'Converted')
        expected_record.add_subfield('999C5s', 'Converted,100,10')

        self.assertEqual(expected_record, converted_record)
    def test_simple(self):
        from invenio.legacy.docextract.record import BibRecord
        from invenio.legacy.docextract.convert_journals import convert_journals
        record = BibRecord()
        record.add_subfield('100__a', 'Test Journal Name')
        record.add_subfield('773__p', 'Test Journal Name')
        record.add_subfield('999C5s', 'Test Journal Name,100,10')
        converted_record = convert_journals(self.kb, record)

        expected_record = BibRecord()
        expected_record.add_subfield('100__a', 'Test Journal Name')
        expected_record.add_subfield('773__p', 'Converted')
        expected_record.add_subfield('999C5s', 'Converted,100,10')

        self.assertEqual(expected_record, converted_record)