def break_up_record( start_record=0, end_record=0 ):
    """ Splits big marc file into smaller files.
        This can successfully re-write the whole errant `rec_19.mrc` file. """
    log.debug( 'start_record, `{st}`; end_record, `{en}`'.format( st=start_record, en=end_record ) )
    BIG_MARC_FILEPATH = settings.INPUT_FILEPATH
    SMALLER_OUTPUT_FILEPATH = settings.OUTPUT_FILEPATH
    log.debug( 'processing file, ``{}```'.format(BIG_MARC_FILEPATH) )
    log.debug( 'output file, ``{}```'.format(SMALLER_OUTPUT_FILEPATH) )

    start_time = datetime.datetime.now()
    count = 0

    with open( BIG_MARC_FILEPATH, 'rb' ) as input_fh:
        # reader = pymarc.MARCReader( input_fh, force_utf8=True, utf8_handling='ignore' )
        # reader = pymarc.MARCReader( input_fh )
        # reader = pymarc.MARCReader( input_fh, to_unicode=True )
        reader = pymarc.MARCReader( input_fh, to_unicode=True, utf8_handling='ignore' )  # works!

        with open( SMALLER_OUTPUT_FILEPATH, 'wb' ) as output_fh:
            writer = pymarc.MARCWriter( output_fh )

            for record in reader:
                count += 1
                if count % 10000 == 0:
                    print( '`{}` records processed'.format(count) )
                if count >= start_record:
                    writer.write( record )
                    if count >= end_record:
                        break

    end_time = datetime.datetime.now()
    log.debug( 'records processed, `{}`'.format(count) )
    log.debug( 'time_taken, `{}`'.format(end_time-start_time) )
Beispiel #2
0
def file_create(record_type, parametres):
    """
    Création du fichier en sortie : XML, iso2709 ou tabulé
    """
    file = object
    id_filename = "-".join([parametres["outputID"], record_type])
    if (parametres["format_file"] == 3):
        filename = id_filename + ".txt"
        file = open(filename, "w", encoding="utf-8")
        headers = ["Numéro de notice", "Type de notice"
                   ] + parametres["select_fields"].split(";")
        funcs.line2report(headers, file, display=False)
    elif (parametres["format_file"] == 2):
        output_encoding = "utf-8"
        if ("xml_encoding_option" in parametres):
            output_encoding = parametres["xml_encoding_option"]
        filename = id_filename + ".xml"

        file = open(filename, "w", encoding=output_encoding)

        file.write(f"<?xml version='1.0' encoding='{output_encoding}'?>\n")
        file.write("<collection>")
    else:
        filename = id_filename + ".iso2709"
        file = mc.MARCWriter(open(filename, "wb"))
    return file
Beispiel #3
0
def break_up_record( start_record=0, end_record=0 ):
    """ Splits big marc file into smaller files.
        This can successfully re-write the whole errant `rec_19.mrc` file. """
    log.debug( 'start_record, `{st}`; end_record, `{en}`'.format( st=start_record, en=end_record ) )
    BIG_MARC_FILEPATH = os.environ['PYMARC_EXP__BIG_MARC_FILEPATH']
    SMALLER_OUTPUT_FILEPATH = os.environ['PYMARC_EXP__SMALLER_OUTPUT_MARC_FILEPATH']
    log.debug( 'processing file, ``{}```'.format(BIG_MARC_FILEPATH) )
    log.debug( 'output file, ``{}```'.format(SMALLER_OUTPUT_FILEPATH) )

    start_time = datetime.datetime.now()
    count = 0
    last_record = 'init'

    with open( BIG_MARC_FILEPATH, 'rb' ) as input_fh:
        # reader = pymarc.MARCReader( input_fh, force_utf8=True, utf8_handling='ignore' )
        # reader = pymarc.MARCReader( input_fh )
        # reader = pymarc.MARCReader( input_fh, to_unicode=True )

        # reader = pymarc.MARCReader( input_fh, to_unicode=True, utf8_handling='ignore' )  # works!
        reader = pymarc.MARCReader( input_fh, to_unicode=True, force_utf8=True, utf8_handling='ignore' )
        # reader = pymarc.MARCReader( input_fh, to_unicode=False, utf8_handling='ignore' )

        with open( SMALLER_OUTPUT_FILEPATH, 'wb' ) as output_fh:
            writer = pymarc.MARCWriter( output_fh )


            processing_flag = True
            while processing_flag is True:
                try:
                    record = next(reader)
                except Exception as e:
                    record = None
                    log.error( 'exception looping through records; ```{}```'.format( unicode(repr(e)) ) )
                    log.error( 'e info, ```{}```'.format(e) )
                    e_type, e_value, e_traceback = sys.exc_info()  # <http://stackoverflow.com/a/15890953>
                    log.error( 'e_type, ```{}``'.format(e_type) )
                    log.error( 'e_value, ```{}``'.format(e_value) )
                    log.error( 'e_traceback, ```{}```'.format(e_traceback) )
                    log.error( 'traceback info, ```{}```'.format( traceback.format_exc() ) )
                    # log.error( 'current record, ```{}```'.format( record ) )
                    log.error( 'current count, `{}`'.format(count) )
                    # log.error( 'last_record, ```{}```'.format(last_record) )

                last_record = record
                count += 1
                if count % 10000 == 0:
                    print( '`{}` records processed'.format(count) )
                if count >= start_record:
                    log.debug( 'count, `{}`'.format(count) )
                    if record:
                        log.debug( 'count is, `{cnt}`, so will write record.as_json()[0:100], ```{rcd}```'.format( cnt=count, rcd=record.as_json()[0:100] ) )
                        writer.write( record )
                    if count >= end_record:
                        processing_flag = False


    end_time = datetime.datetime.now()
    log.debug( 'records processed, `{}`'.format(count) )
    log.debug( 'time_taken, `{}`'.format(end_time-start_time) )
Beispiel #4
0
def get_resource():
    os.makedirs(parameters["out-path"], exist_ok=True)
    dat_file = open(parameters["out-path"] + "/{}.dat".format('marc'), 'wb')
    marc_writer = pymarc.MARCWriter(dat_file)
    for row in next(resources):
        json_records_string = '[' + json.dumps(json.loads(row["json"])) + ']'
        for record in pymarc.JSONReader(json_records_string):
            marc_writer.write(record)
        yield row
Beispiel #5
0
 def test_close_true(self):
     """If close_fh is true, then the file handle is also closed."""
     file_handle = BytesIO()
     self.assertFalse(file_handle.closed, "The file handle should be open")
     writer = pymarc.MARCWriter(file_handle)
     self.assertFalse(file_handle.closed,
                      "The file handle should still be open")
     writer.close()
     self.assertTrue(file_handle.closed,
                     "The file handle should close when the writer closes")
Beispiel #6
0
def update_marc_file(infile, outfile, cdlpath):
    """add cdlpath info to all the MARC records in a file"""
    # open MARC file for reading
    reader = pymarc.MARCReader(
      file(infile),
      to_unicode=True, 
      force_utf8=True, 
      utf8_handling='ignore'
    )

    # keep the new file in memory
    string = StringIO.StringIO()
    writer = pymarc.MARCWriter(string)

    # main look through all the records
    count = 0
    for record in reader:
        count += 1
        # create new MARC field and add it to the record
        field = pymarc.Field(
            tag = '941', 
            indicators = ['0','1'],
            subfields = [ 'a', cdlpath ]
        )
        record.add_field(field)

        try:				# try to write the record
            writer.write(record)
        except UnicodeDecodeError as inst:   # catch Unicode errors
            title = ''
            recordId = ''
            if record['245'] is not None:
                title = record['245']
            if record['001'] is not None:
                recordId = record['001']
            print "--- error with record %s %s" % (count, recordId) 
            print "leader9 = %s" % record.leader[9]
            print title
            print inst
            # set leader9 to 'a' (indicates unicode) and try again
            ## this didn't work
#           try:
#               l = list(record.leader)
#               l[9] = 'a' # UTF-8 encoding
#               record.leader = "".join(l)
#               writer.write(record)
#           except UnicodeDecodeError as inst2:
#               print "tried again and failed again"
#               print "leader9 = %s" % record.leader[9]
#               print inst2

    out  = open(outfile, mode="w")
    sys.stdout = out
    print string.getvalue()
    string.close()
Beispiel #7
0
 def test_close_false(self):
     """If close_fh is false, then the file handle is NOT closed."""
     file_handle = BytesIO()
     self.assertFalse(file_handle.closed, "The file handle should be open")
     writer = pymarc.MARCWriter(file_handle)
     self.assertFalse(file_handle.closed,
                      "The file handle should still be open")
     writer.close(close_fh=False)
     self.assertFalse(
         file_handle.closed,
         "The file handle should NOT close when the writer closes",
     )
Beispiel #8
0
def get_resource(resource):
    last_ccl_query, dat_file = None, None
    for row in resource:
        if row['first_ccl_query'] != last_ccl_query:
            if dat_file:
                dat_file.close()
            dat_file = open(get_dat_file_name(row), 'wb')
            marc_writer = pymarc.MARCWriter(dat_file)
            last_ccl_query = row['first_ccl_query']
        assert marc_writer
        for record in pymarc.JSONReader(json.dumps([json.loads(row['json'])])):
            marc_writer.write(record)
        yield row
Beispiel #9
0
def convert_xml_to_marc(hostenv):
    """Convert MARC XML to MARC formatted .mrc file"""
    for marcfilename in os.listdir(app_configs[hostenv]['marc_dir']):
        if marcfilename[-3:] == 'xml':
            newfilename = re.sub("-orig.xml", "-marc.mrc", marcfilename)
            logging.info("Converting to MARC %s", marcfilename)
            marc_recs_out = pymarc.MARCWriter(open(app_configs[hostenv]['marc_dir'] \
                                                   +"/"+ newfilename, 'wb'))
            marc_xml_array = pymarc.parse_xml_to_array(app_configs[hostenv]['marc_dir'] \
                                                       +marcfilename)
            for rec in marc_xml_array:
                marc_recs_out.write(rec)
            marc_recs_out.close()
Beispiel #10
0
def file_create(record_type, parametres):
    file = object
    id_filename = "-".join([parametres["outputID"], record_type])
    if (parametres["format_file"] == 2):
        filename = id_filename + ".xml"
        file = open(filename, "w", encoding="utf-8")
        file.write("<?xml version='1.0'?>\n")
        file.write("<mxc:collection ")
        for key in main.ns:
            file.write(' xmlns:' + key + '="' + main.ns[key] + '"')
        file.write(">\n")
    else:
        filename = id_filename + ".iso2709"
        file = mc.MARCWriter(open(filename, "wb"))
    return file
Beispiel #11
0
    def test_write(self):

        # write a record off to a file
        writer = pymarc.MARCWriter(file('test/writer-test.dat', 'w'))
        record = pymarc.Record()
        field = pymarc.Field('245', ['0', '0'], ['a', 'foo'])
        record.add_field(field)
        writer.write(record)
        writer.close()

        # read it back in
        reader = pymarc.MARCReader(file('test/writer-test.dat'))
        record = reader.next()

        # remove it
        os.remove('test/writer-test.dat')
Beispiel #12
0
    def test_copy_utf8(self):
        writer = pymarc.MARCWriter(open('test/write-utf8-test.dat', 'wb'))
        new_record = pymarc.Record(to_unicode=True, force_utf8=True)

        def process_xml(record):
            new_record.leader = record.leader

            for field in record.get_fields():
                new_record.add_field(field)

        pymarc.map_xml(process_xml, 'test/utf8.xml')

        try:
            writer.write(new_record)
            writer.close()

        finally:
            # remove it
            os.remove('test/write-utf8-test.dat')
Beispiel #13
0
    def test_write(self):
        """Write a record off to a file."""
        file_handle = open("test/writer-test.dat", "wb")
        writer = pymarc.MARCWriter(file_handle)
        record = pymarc.Record()
        field = pymarc.Field("245", ["0", "0"], ["a", "foo"])
        record.add_field(field)
        writer.write(record)
        writer.close()
        self.assertTrue(file_handle.closed,
                        "The file handle should close when the writer closes")

        # read it back in
        reader = pymarc.MARCReader(open("test/writer-test.dat", "rb"))
        next(reader)
        reader.close()

        # remove it
        os.remove("test/writer-test.dat")
Beispiel #14
0
    def test_write(self):

        # write a record off to a file
        file_handle = open('test/writer-test.dat', 'wb')
        writer = pymarc.MARCWriter(file_handle)
        record = pymarc.Record()
        field = pymarc.Field('245', ['0', '0'], ['a', 'foo'])
        record.add_field(field)
        writer.write(record)
        writer.close()
        self.assertTrue(file_handle.closed,
                        'The file handle should close when the writer closes')

        # read it back in
        reader = pymarc.MARCReader(open('test/writer-test.dat', 'rb'))
        r = next(reader)
        reader.close()

        # remove it
        os.remove('test/writer-test.dat')
Beispiel #15
0
def imslp_tarball_to_marc(tarball, outputfile=None, legacy_mapping=None, max_failures=30):
    """
    Convert an IMSLP tarball to MARC binary output file without extracting it.
    If outputfile is not given, write to a temporary location.

    Returns the location of the resulting MARC file.

    A maximum number of failed conversions can be specified with `max_failures`,
    as of 2018-04-25, there were 30 records w/o title.
    """
    if outputfile is None:
        _, outputfile = tempfile.mkstemp(prefix="siskin-")

    stats = collections.Counter()

    with open(outputfile, "wb") as output:
        writer = pymarc.MARCWriter(output)
        with tarfile.open(tarball) as tar:
            for member in tar.getmembers():
                fobj = tar.extractfile(member)
                try:
                    record = imslp_xml_to_marc(fobj.read(), legacy_mapping=legacy_mapping)
                    writer.write(record)
                except ValueError as exc:
                    logger.warn("conversion failed: %s", exc)
                    stats["failed"] += 1
                finally:
                    fobj.close()
                    stats["processed"] += 1

        writer.close()

        if stats["failed"] > max_failures:
            logger.warn("%d records failed, only %d failures allowed", stats["failed"], max_failures)
            raise RuntimeError("more than %d records failed", max_failures)

        logger.debug("%d/%d records failed/processed", stats["failed"], stats["processed"])

    return outputfile
Beispiel #16
0
    def run(self):
        """
        Iterate over all zipfiles in reverse, convert and concat binary marc
        into tempfile, then deduplicate.
        """

        # Load all deletions into set.
        deleted = set()

        deldir = os.path.dirname(self.input().get('deletions').path)
        for path in sorted(iterfiles(deldir), reverse=True):
            with open(path) as handle:
                for i, line in enumerate(handle, start=1):
                    line = line.strip()
                    if len(line) > 20:
                        self.logger.warn("suspicious id: %s", line)
                    deleted.add(line)

        # Load updates.
        pattern = re.compile(r'^date-[0-9]{4,4}-[0-9]{2,2}-[0-9]{2,2}.zip$')
        datadir = os.path.dirname(self.input().get('data').path)

        # Combine all binary MARC records in this file.
        _, combined = tempfile.mkstemp(prefix='siskin-')

        for path in sorted(iterfiles(datadir), reverse=True):
            filename = os.path.basename(path)

            if not pattern.match(filename):
                self.logger.warn("ignoring invalid filename: %s", path)
                continue
            if os.stat(path).st_size < 22:
                self.logger.warn("ignoring possibly empty zip file: %s", path)
                continue

            with zipfile.ZipFile(path) as zf:
                for name in zf.namelist():
                    with zf.open(name) as handle:
                        with tempfile.NamedTemporaryFile(delete=False) as dst:
                            shutil.copyfileobj(handle, dst)
                        shellout(
                            "yaz-marcdump -i marcxml -o marc {input} >> {output}",
                            input=dst.name,
                            output=combined,
                            ignoremap={5: 'expected error from yaz'})
                        os.remove(dst.name)

        # Finally, concatenate initial dump.
        shellout("cat {input} >> {output}",
                 input=self.input().get('dump').path,
                 output=combined)

        # Already seen identifier.
        seen = set()

        with self.output().open('wb') as output:
            writer = pymarc.MARCWriter(output)

            # Iterate over MARC records (which are newest to oldest, keep track of seen identifiers).
            with open(combined) as handle:
                reader = pymarc.MARCReader(handle,
                                           force_utf8=True,
                                           to_unicode=True)
                for record in reader:
                    field = record["001"]
                    if not field:
                        self.logger.debug("missing identifier")
                        continue

                    id = field.value()

                    if id in seen:
                        self.logger.debug("skipping duplicate: %s", id)
                        continue
                    if id in deleted:
                        self.logger.debug("skipping deleted: %s", id)
                        continue

                    self.logger.debug("adding %s", id)
                    writer.write(record)
                    seen.add(id)

        self.logger.debug(
            "found %s unique records (deletion list contained %s ids)",
            len(seen), len(deleted))
        os.remove(combined)
Beispiel #17
0
	handles_csv = open(aco_globals.batch_folder+'/handles.csv', 'r')
	aco_globals.handles_lines = handles_csv.readlines()
	handles_csv.close()
except:	handles_csv = ''

# retrieve the CSV file containing the BSNs and source entity (SE) book numbers
try:
	bsn_se_csv = open(aco_globals.batch_folder+'/bsn-se-map.csv', 'r')
	aco_globals.bsn_se_lines = bsn_se_csv.readlines()
	bsn_se_csv.close()
except:	bsn_se_csv = ''

# OUTPUT FILES
output_folder = aco_globals.batch_folder+'/'+batch_name+'_3'

aco_globals.marcRecsOut_errors_all = pymarc.MARCWriter(file(output_folder+'/'+batch_name+'_3_errors_all.mrc', 'w'))
aco_globals.recs_errors_all_txt = codecs.open(output_folder+'/'+batch_name+'_3_errors_all.txt', 'w', encoding='utf8')
aco_globals.recs_errors_all_txt.write('ALL Records containing any type of error - batch '+batch_name+'\n')
aco_globals.recs_errors_all_txt.write('--  Each of these records have one or more of the following errors:\n')
aco_globals.recs_errors_all_txt.write('    --  no 880 fields\n')
aco_globals.recs_errors_all_txt.write('    --  missing a key 880 field\n')
aco_globals.recs_errors_all_txt.write('    --  have an unlinked 880 field\n')
aco_globals.recs_errors_all_txt.write('    --  have a series heading error in the 490/800/810/811/830 fields\n')
aco_globals.recs_errors_all_txt.write('    --  have one of the various miscellaneous errors, marked with ERROR-MISC\n')
aco_globals.recs_errors_all_txt.write('Report produced: '+aco_globals.curr_time+'\n')

all_recs_analysis_txt = codecs.open(output_folder+'/'+batch_name+'_3_all_recs_analysis.txt', 'w', encoding='utf8')

aco_globals.marcRecsOut_final_subset = pymarc.MARCWriter(file(output_folder+'/'+batch_name+'_3_final_recs.mrc', 'w'))
aco_globals.marcRecsOut_final_all = pymarc.MARCWriter(file(aco_globals.batch_folder+'/'+batch_name+'_4_final_recs.mrc', 'w'))
Beispiel #18
0
import marcx
import pandas
import pymarc

warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

inputfilename = "160_input.csv"
outputfilename = "160_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

outputfile = io.open(outputfilename, "wb")
writer = pymarc.MARCWriter(outputfile)

csv_records = pandas.read_csv(inputfilename, encoding="latin-1", sep=";")
for csv_record in csv_records.iterrows():

    csv_record = csv_record[1]
    marc_record = marcx.Record(force_utf8=True)

    marc_record.leader = "     nam  22        4500"

    f001 = "finc-160-" + str(csv_record["001"])
    marc_record.add("001", data=f001)

    # Zugangsformat
    marc_record.add("007", data="tu")
Beispiel #19
0
import pymarc
from pymarc import Record, Field
from copy import deepcopy

############################################
# Read in a file of MARC records
############################################
my_records_in = pymarc.MARCReader(file('my_marc_recs_in.mrc'), to_unicode=True, force_utf8=True)

############################################
# Write out a file of MARC records
############################################
my_records_out = pymarc.MARCWriter(file('my_marc_recs_out.mrc', 'w'))


############################################
# Iterate through the input file of MARC records
############################################
rec_num = 1
for my_record in my_records_in:		# iterate through each of the records in the file
	print 'Record #: '+str(rec_num)
	my_orig_record = deepcopy(my_record)
	print_record = False
	
	
	############################################
	# DEMO 1 - Get 650 fields from MARC records using record.get_fields() function
# 	my_650s = my_record.get_fields('650')
# 	print 'List of field objects returned: '
# 	print my_650s
# 	
Beispiel #20
0
import xmltodict

#our includes
from getbib import *

#open the file of ISBNs/Standard numbers (see sample.txt)
bibs = open(sys.argv[1], "r").readlines()

#extract file name for writing to output files
file_name = os.path.splitext(os.path.basename(sys.argv[1]))[0]

#strip spaces
bibs = [x.strip() for x in bibs]

#open up a new MARC file
writer = pymarc.MARCWriter(open('output/' + file_name + '.mrc', 'wb'))

#start three lists to create a report at the end
missing_list = []
brief_list = []
retrieved_list = []

#write the full output from the Worldcat endpoint to an xml file
full_xml = open('output/' + file_name + '_full.xml', 'wb')

#iterate through each identifier and retrieve a record from the API
for b in bibs:

    print(b)
    #check for empty lines
    if len(b) == 0:
Beispiel #21
0
#!/usr/bin/python

import os
import errno
import sys
import time
import shutil
import codecs
import pymarc
from pymarc import Record, Field
import aco_globals
import aco_functions

aco_mrc_all = pymarc.MARCWriter(
    file(aco_globals.work_folder + '/' + 'mrc_out_all-3.mrc', 'w'))

# Retrieve individual final MARC files from each mrc_out batch folder
for root, folders, files in os.walk(aco_globals.work_folder):
    for folder in folders:
        if folder == "mrc_out":
            mrc_out_path = os.path.join(root, folder)
            for root, folders, files in os.walk(mrc_out_path):
                for mrc_file in files:
                    mrc_file_path = os.path.join(root, mrc_file)
                    this_mrc_files = pymarc.MARCReader(file(mrc_file_path),
                                                       to_unicode=True,
                                                       force_utf8=True)
                    for this_mrc_file in this_mrc_files:
                        this_mrc_003 = this_mrc_file.get_fields(
                            '003')[0].value()
                        this_mrc_001 = this_mrc_file.get_fields(
Beispiel #22
0
def _mrc(record):
    mrc_file = StringIO()
    writer = pymarc.MARCWriter(mrc_file)
    writer.write(record)
    mrc_file.seek(0)
    return mrc_file.read()
try:
    bsn_se_csv = open(aco_globals.batch_folder + '/bsn-se-map.csv', 'r')
    aco_globals.bsn_se_lines = bsn_se_csv.readlines()
    bsn_se_csv.close()
except:
    bsn_se_csv = ''

# OUTPUT FILES
try:
    os.makedirs(aco_globals.batch_folder + '/' + batch_name + '_1/')
except OSError as exception:
    if exception.errno != errno.EEXIST:
        raise

marcRecsOut_orig_no_oclc_nums = pymarc.MARCWriter(
    file(
        aco_globals.batch_folder + '/' + batch_name + '_1/' + batch_name +
        '_1_orig_no_oclc_nums.mrc', 'w'))
orig_no_oclc_nums_txt = codecs.open(aco_globals.batch_folder + '/' +
                                    batch_name + '_1/' + batch_name +
                                    '_1_orig_no_oclc_nums.txt',
                                    'w',
                                    encoding='utf-8')
orig_no_oclc_nums_txt.write('003/Inst,001/BSN,OCLC number(s),245a/Title\n')

marcRecsOut_orig_with_oclc_nums = pymarc.MARCWriter(
    file(
        aco_globals.batch_folder + '/' + batch_name + '_1/' + batch_name +
        '_1_orig_with_oclc_nums.mrc', 'w'))
orig_with_oclc_nums_txt = codecs.open(aco_globals.batch_folder + '/' +
                                      batch_name + '_1/' + batch_name +
                                      '_1_orig_with_oclc_nums.txt',
Beispiel #24
0
errorlist = ""
path = os.path.dirname(os.path.abspath(__file__)) + '\\'
for infile in glob.glob(os.path.join(path, '*.mrc')):
    with open(infile, 'rb') as fh:
        reader = pymarc.MARCReader(fh, force_utf8=True)
        record = next(reader)
        new_record = pymarc.Record(to_unicode=True, force_utf8=True)
        new_record.leader = record.leader
        for field in record.get_fields():
            new_record.add_field(field)
        for f in new_record.get_fields('599'):
            new_record.remove_field(
                new_record.get_fields('599')[0]
            )  # only grabs first instance of 599 but only rarely will there be more than one
            print("deleted 599 from " + infile)
        for f in new_record.get_fields('910'):
            new_record.remove_field(new_record.get_fields('910')
                                    [0])  # some old records have 910 for stats
            print("deleted 910 from " + infile)

        out = pymarc.MARCWriter(open(infile, 'wb'))
        # the MARCWriter part above can be modified to create xml, json, and mnemonic mrk formats instead
        try:
            out.write(new_record)
        except Exception:
            errorlist += infile + '\n'

        out.close()
if errorlist != "":
    with open(path + 'errors.log', 'w+') as fh:
        fh.write(errorlist)
def xml_to_mrc(path_in, path_out):
    writer = pymarc.MARCWriter(open(path_out, 'wb'))
    records = pymarc.map_xml(writer.write, path_in)
    writer.close()
Beispiel #26
0
# OUTPUT FILES
output_folder = aco_globals.batch_folder + '/' + batch_name + '_3'

try:
    os.makedirs(output_folder + '/' + batch_name + '_3_errors_no_880s/')
    os.makedirs(output_folder + '/' + batch_name +
                '_3_errors_missing_key_880s/')
    os.makedirs(output_folder + '/' + batch_name + '_3_errors_unlinked_880s/')
    os.makedirs(output_folder + '/' + batch_name + '_3_errors_series/')
    os.makedirs(output_folder + '/' + batch_name + '_3_errors_misc/')
except OSError as exception:
    if exception.errno != errno.EEXIST:
        raise

aco_globals.marcRecsOut_no_880s = pymarc.MARCWriter(
    file(
        output_folder + '/' + batch_name + '_3_errors_no_880s/' + batch_name +
        '_3_no_880s.mrc', 'w'))
aco_globals.recs_no_880s_txt = codecs.open(output_folder + '/' + batch_name +
                                           '_3_errors_no_880s/' + batch_name +
                                           '_3_no_880s.txt',
                                           'w',
                                           encoding='utf8')
aco_globals.recs_no_880s_txt.write(
    'Records with NO 880 script fields - batch ' + batch_name + '\n')
aco_globals.recs_no_880s_txt.write(
    '--  These records do NOT contain ANY 880 script fields\n')
aco_globals.recs_no_880s_txt.write('Report produced: ' +
                                   aco_globals.curr_time + '\n')

aco_globals.marcRecsOut_missing_key_880s = pymarc.MARCWriter(
    file(