def generateMARCXML(MARC21input, MARCXMLoutput):
    """Write MARCXML file for given MARC21 binary file."""
    reader = pymarc.MARCReader(open(MARC21input, 'rb'))
    for record in reader:
        writer = pymarc.XMLWriter(open(MARCXMLoutput,'wb'))
        writer.write(record)
        writer.close()
Beispiel #2
0
 def test_writing_1_record(self):
     expected = r"""
         <?xml version="1.0" encoding="UTF-8"?>
         <collection xmlns="http://www.loc.gov/MARC21/slim">
         <record>
         <leader>          22        4500</leader>
         <datafield ind1="0" ind2="0" tag="100">
         <subfield code="a">me</subfield>
         </datafield>
         <datafield ind1="0" ind2="0" tag="245">
         <subfield code="a">Foo /</subfield>
         <subfield code="c">by me.</subfield>
         </datafield>
         </record>
         </collection>
     """
     expected = textwrap.dedent(expected[1:]).replace("\n", "").encode()
     file_handle = BytesIO()
     try:
         writer = pymarc.XMLWriter(file_handle)
         record = pymarc.Record()
         record.add_field(pymarc.Field("100", ["0", "0"], ["a", "me"]))
         record.add_field(
             pymarc.Field("245", ["0", "0"], ["a", "Foo /", "c", "by me."]))
         writer.write(record)
         writer.close(close_fh=False)
         self.assertEquals(file_handle.getvalue(), expected)
     finally:
         file_handle.close()
Beispiel #3
0
 def test_writing_1_record(self):
     expected = r"""
         <?xml version="1.0" encoding="UTF-8"?>
         <collection xmlns="http://www.loc.gov/MARC21/slim">
         <record>
         <leader>          22        4500</leader>
         <datafield ind1="0" ind2="0" tag="100">
         <subfield code="a">me</subfield>
         </datafield>
         <datafield ind1="0" ind2="0" tag="245">
         <subfield code="a">Foo /</subfield>
         <subfield code="c">by me.</subfield>
         </datafield>
         </record>
         </collection>
     """
     expected = textwrap.dedent(expected[1:]).replace('\n', '')
     if str != binary_type:
         expected = expected.encode()
     file_handle = BytesIO()
     try:
         writer = pymarc.XMLWriter(file_handle)
         record = pymarc.Record()
         record.add_field(pymarc.Field('100', ['0', '0'], ['a', u('me')]))
         record.add_field(
             pymarc.Field(
                 '245', ['0', '0'],
                 ['a', u('Foo /'), 'c', u('by me.')]))
         writer.write(record)
         writer.close(close_fh=False)
         self.assertEquals(file_handle.getvalue(), expected)
     finally:
         file_handle.close()
Beispiel #4
0
 def test_close_true(self):
     """If close_fh is true, then the file handle is also closed."""
     file_handle = BytesIO()
     self.assertFalse(file_handle.closed, "The file handle should be open")
     writer = pymarc.XMLWriter(file_handle)
     self.assertFalse(file_handle.closed,
                      "The file handle should still be open")
     writer.close()
     self.assertTrue(file_handle.closed,
                     "The file handle should close when the writer closes")
Beispiel #5
0
def write_collection(records, write_location):
    '''writes an array/generator of records into an xml collection file'''
    writer = pymarc.XMLWriter(open(write_location, 'wb'))
    for record in records:
        if type(record) == pymarc.record.Record:
            writer.write(record)
        else:
            raise Exception(
                'attempted to pass non-record object into record writer')
    writer.close()
Beispiel #6
0
 def test_close_false(self):
     """If close_fh is false, then the file handle is NOT closed."""
     file_handle = BytesIO()
     self.assertFalse(file_handle.closed, "The file handle should be open")
     writer = pymarc.XMLWriter(file_handle)
     self.assertFalse(file_handle.closed,
                      "The file handle should still be open")
     writer.close(close_fh=False)
     self.assertFalse(
         file_handle.closed,
         "The file handle should NOT close when the writer closes",
     )
Beispiel #7
0
 def test_writing_0_records(self):
     expected = r"""
         <?xml version="1.0" encoding="UTF-8"?>
         <collection xmlns="http://www.loc.gov/MARC21/slim">
         </collection>
     """
     expected = textwrap.dedent(expected[1:]).replace("\n", "").encode()
     file_handle = BytesIO()
     try:
         writer = pymarc.XMLWriter(file_handle)
         writer.close(close_fh=False)
         self.assertEquals(file_handle.getvalue(), expected)
     finally:
         file_handle.close()
def main():
    '''parses args pointing to record xml paths, specifies output paths, and applies "pull_arabic"'''
    logger = logging.getLogger(__name__)
    logger.info(
        'collecting arabic records and extracting parallel Arabic/Romanized representations'
    )

    parser = argparse.ArgumentParser()

    parser.add_argument('input_directory',
                        help='path to directory containing records')
    parser.add_argument(
        '-f',
        '--sub_directory_filter',
        help=
        'select a particular subdirectory inside a complex directory structure'
    )
    parser.add_argument(
        '-n',
        '--name',
        help='optional source name, otherwise take directory name')

    args = parser.parse_args()

    if args.name:
        name = args.name
    else:
        name = args.input_directory.split('/')[-1]
    logger.info(f'source: {name}')

    record_paths = get_xml_paths(args.input_directory,
                                 args.sub_directory_filter)

    writer = pymarc.XMLWriter(open(f'data/arabic_records/{name}.xml', 'wb'))

    for path in record_paths:
        xmlname = path.split('/')[-1].replace('.xml', '')
        pymarc.map_xml(lambda record: pull_arabic(record, writer=writer), path)
    writer.close()

    global counter008
    global counter880
    logger.info(
        f'# of Arabic records ("ara" in language field 008): {counter008}')
def write_to_file(reclist, filename="output", form="bin"):
    """write records to file"""
    if form == "bin":
        filename = filename + ".mrc"
        with open(filename, "wb") as out:
            for record in reclist:
                out.write(record.as_marc())
    elif form == "xml":
        filename = filename + ".xml"
        writer = pymarc.XMLWriter(open(filename, "wb"))
        for record in reclist:
            writer.write(record)
        writer.close()
    elif form == "text":
        filename = filename + ".txt"
        with open(filename, "wt", encoding="utf-8") as out:
            writer = pymarc.TextWriter(out)
            for record in reclist:
                writer.write(record)
Beispiel #10
0
 def test_writing_empty_record(self):
     expected = r"""
         <?xml version="1.0" encoding="UTF-8"?>
         <collection xmlns="http://www.loc.gov/MARC21/slim">
         <record>
         <leader>          22        4500</leader>
         </record>
         </collection>
     """
     expected = textwrap.dedent(expected[1:]).replace('\n', '')
     if str != binary_type:
         expected = expected.encode()
     file_handle = BytesIO()
     try:
         writer = pymarc.XMLWriter(file_handle)
         record = pymarc.Record()
         writer.write(record)
         writer.close(close_fh=False)
         self.assertEquals(file_handle.getvalue(), expected)
     finally:
         file_handle.close()
Beispiel #11
0
 def write_marcxml(self, record, filename):
     """Write record to filename as MARCXML."""
     with open(filename, 'wb') as fh:
         writer = pymarc.XMLWriter(fh)
         writer.write(record)
         writer.close()
Beispiel #12
0
	for root, dirs, files in os.walk(rootdir):
		for name in files:
			if waiting:
				print(name,restart_file)
				if name == restart_file:
					waiting = False

			if not waiting:		
				if name[0] != '.':
					start_time = datetime.datetime.now().time()

					print(read_format)
					# Convert JSON files into MARCXML
					if read_format == 'json':
						print("Starting output writer")
						output_writer = pymarc.XMLWriter(open(results_folder_name + SLASH + name + '.xml','wb'))
						print("Opening file")
						readFile(rootdir + SLASH + name,output_writer)
						output_writer.close()

					print("Opening output file")
					# Convert MARCXML into BIBFRAME
					if read_format == 'json':
						bibf_output_file = bibf_results_folder_name + SLASH + 'BIBF_' + name + '.xml'
						xml_input_file = results_folder_name + SLASH + name + '.xml'
					else:
						bibf_output_file = bibf_results_folder_name + SLASH + 'BIBF_' + name
						xml_input_file = root + SLASH + name
						print(xml_input_file)

					bibf_output = open(bibf_output_file,'w')