# (Re)Process the records # Convert the individual marcxml_in files to raw marc and write them all to a single .mrc file # OUTPUT FILE marcRecsOut_orig_recs = pymarc.MARCWriter(file(aco_globals.batch_folder+'/'+batch_name+'_0_orig_recs.mrc', 'w')) marcxml_dir = aco_globals.batch_folder+'/marcxml_in' for filename in os.listdir(marcxml_dir): file_path = os.path.join(marcxml_dir,filename) if os.path.isfile(file_path): if file_path[-3:]=='xml': marc_xml_array = pymarc.parse_xml_to_array(file_path) for rec in marc_xml_array: rec = aco_functions.pad_008(rec) rec_001 = rec.get_fields('001')[0] print rec_001 marcRecsOut_orig_recs.write(rec) marcRecsOut_orig_recs.close() # Extract the OCLC numbers from each record and write records to .mrc and .txt files depending if record contains OCLC number or not # INPUT FILE marcRecsIn_orig_recs = pymarc.MARCReader(file(aco_globals.batch_folder+'/'+batch_name+'_0_orig_recs.mrc'), to_unicode=True, force_utf8=True) # OUTPUT FILES try: os.makedirs(aco_globals.batch_folder+'/'+batch_name+'_1/') except OSError as exception: if exception.errno != errno.EEXIST: raise
oclc_nums_for_export = codecs.open(aco_globals.batch_folder+'/'+batch_name+'_1/'+batch_name+'_1_oclc_nums_for_export.txt', 'w', encoding='utf-8') all_recs_analysis_txt = codecs.open(aco_globals.batch_folder+'/'+batch_name+'_1/'+batch_name+'_1_all_recs_analysis.txt', 'w', encoding='utf8') marcRecsOut_orig_recs = pymarc.MARCWriter(file(aco_globals.batch_folder+'/'+batch_name+'_0_orig_recs.mrc', 'w')) # Convert the individual marcxml_in files to raw marc and write them all to a single .mrc file marcxml_dir = aco_globals.batch_folder+'/marcxml_in' for filename in os.listdir(marcxml_dir): file_path = os.path.join(marcxml_dir,filename) if os.path.isfile(file_path): if file_path[-3:]=='xml': marc_xml_array = pymarc.parse_xml_to_array(file_path) for orig_rec in marc_xml_array: orig_rec = aco_functions.pad_008(orig_rec) orig_003_value = orig_rec.get_fields('003')[0].value() # the institutional code from the 003 orig_001_value = orig_rec.get_fields('001')[0].value() # the local BSN from the 001 orig_245 = orig_rec.get_fields('245')[0] orig_245a = orig_245.get_subfields('a')[0] # the main title from the 245 subfield a print orig_001_value orig_rec_count_tot +=1 # Extract the OCLC numbers from each record and write records to .mrc and .txt files depending if record contains OCLC number or not rec_oclc_nums = set() # set variable to capture unique list of OCLC numbers for just this record oclc_num_exists = False for oclc_num_field in rec.get_fields('035','079'): # iterate through all the 035/079 fields in the original partner record oclc_num_field_az = oclc_num_field.get_subfields('a','z') # capture the list of all subfields a or z in the 035/079 fields if len(oclc_num_field_az) > 0: # check if subfield a or z exists in the 035/079 fields for this_az in oclc_num_field_az: # iterate through each of the subfields a or z if this_az.startswith('(OCoLC)') or this_az.startswith('o'): # check if the subfield data is an OCLC number
'w', encoding='utf8') marcRecsOut_orig_recs = pymarc.MARCWriter( file(aco_globals.batch_folder + '/' + batch_name + '_0_orig_recs.mrc', 'w')) # Convert the individual marcxml_in files to raw marc and write them all to a single .mrc file marcxml_dir = aco_globals.batch_folder + '/marcxml_in' for filename in os.listdir(marcxml_dir): file_path = os.path.join(marcxml_dir, filename) if os.path.isfile(file_path): if file_path[-3:] == 'xml': marc_xml_array = pymarc.parse_xml_to_array(file_path) for orig_rec in marc_xml_array: orig_rec = aco_functions.pad_008(orig_rec) orig_003_value = orig_rec.get_fields( '003')[0].value() # the institutional code from the 003 orig_001_value = orig_rec.get_fields( '001')[0].value() # the local BSN from the 001 orig_245 = orig_rec.get_fields('245')[0] orig_245a = orig_245.get_subfields('a')[ 0] # the main title from the 245 subfield a print orig_001_value orig_rec_count_tot += 1 # Extract the OCLC numbers from each record and write records to .mrc and .txt files depending if record contains OCLC number or not rec_oclc_nums = set( ) # set variable to capture unique list of OCLC numbers for just this record oclc_num_exists = False for oclc_num_field in rec.get_fields(
# (Re)Process the records # Convert the individual marcxml_in files to raw marc and write them all to a single .mrc file # OUTPUT FILE marcRecsOut_orig_recs = pymarc.MARCWriter( file(aco_globals.batch_folder + '/' + batch_name + '_0_orig_recs.mrc', 'w')) marcxml_dir = aco_globals.batch_folder + '/marcxml_in' for filename in os.listdir(marcxml_dir): file_path = os.path.join(marcxml_dir, filename) if os.path.isfile(file_path): if file_path[-3:] == 'xml': marc_xml_array = pymarc.parse_xml_to_array(file_path) for rec in marc_xml_array: rec = aco_functions.pad_008(rec) rec_001 = rec.get_fields('001')[0] print rec_001 marcRecsOut_orig_recs.write(rec) marcRecsOut_orig_recs.close() # Extract the OCLC numbers from each record and write records to .mrc and .txt files depending if record contains OCLC number or not # INPUT FILE marcRecsIn_orig_recs = pymarc.MARCReader(file(aco_globals.batch_folder + '/' + batch_name + '_0_orig_recs.mrc'), to_unicode=True, force_utf8=True) # OUTPUT FILES try: os.makedirs(aco_globals.batch_folder + '/' + batch_name + '_1/')