def transpose_to_marc21(record): Mrecord=Record(force_utf8=True) Mrecord.leader=record["_LEADER"] for field in record: if isint(field): if int(field)<10: if isinstance(record[field],list): for elem in record[field]: Mrecord.add_field(Field(tag=field,data=elem)) elif isinstance(record[field],str): Mrecord.add_field(Field(tag=field,data=record[field])) else: for subfield in record[field]: for ind, values in subfield.items(): indicators=[] subfields=[] for elem in values: for k,v in elem.items(): if isinstance(v,str): subfields.append(k) subfields.append(v) elif isinstance(v,list): for subfield_elem in v: subfields.append(k) subfields.append(subfield_elem) for elem in ind: indicators.append(elem) Mrecord.add_field(Field(tag=str(field), indicators=indicators, subfields=subfields)) return Mrecord.as_marc()
def create_record(cls, work, annotator, force_create=False, integration=None): """Build a complete MARC record for a given work.""" if callable(annotator): annotator = annotator() if isinstance(work, BaseMaterializedWork): pool = work.license_pool else: pool = work.active_license_pool() if not pool: return None edition = pool.presentation_edition identifier = pool.identifier _db = Session.object_session(work) record = None existing_record = getattr(work, annotator.marc_cache_field) if existing_record and not force_create: record = Record(data=existing_record.encode('utf-8'), force_utf8=True) if not record: record = Record(leader=annotator.leader(work), force_utf8=True) annotator.add_control_fields(record, identifier, pool, edition) annotator.add_isbn(record, identifier) # TODO: The 240 and 130 fields are for translated works, so they can be grouped even # though they have different titles. We do not group editions of the same work in # different languages, so we can't use those yet. annotator.add_title(record, edition) annotator.add_contributors(record, edition) annotator.add_publisher(record, edition) annotator.add_physical_description(record, edition) annotator.add_audience(record, work) annotator.add_series(record, edition) annotator.add_system_details(record) annotator.add_ebooks_subject(record) data = record.as_marc() if isinstance(work, BaseMaterializedWork): setattr(pool.work, annotator.marc_cache_field, data) else: setattr(work, annotator.marc_cache_field, data) # Add additional fields that should not be cached. annotator.annotate_work_record(work, pool, edition, identifier, record, integration) return record
f = open('ostinos.csv') csv_f = csv.reader(f) out = open('osti_recs.csv', 'w') data = csv.writer(out) data.writerow([ 'Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', '' ]) marcOut = open('ostimarc.mrc', 'w') dc = '{http://purl.org/dc/elements/1.1/}' dcq = '{http://purl.org/dc/terms/}' for number in csv_f: ostiId = number[0] marc = Record() # Create a new record for each loop. tree = etree.parse('http://www.osti.gov/scitech/scitechxml?Identifier=' + ostiId + '.xml') for node in tree.iter(): if node.tag == dc + 'ostiId': if node.text == ostiId: o = node.getparent() osti = o.getchildren() getRecs(osti, data) getMarc(osti, marc) marcOut.write(marc.as_marc()) # Write each new record.
#------------------------------------------------- f = open('ostinos.csv') csv_f = csv.reader(f) out = open ('osti_recs.csv', 'w') data = csv.writer(out) data.writerow(['Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', '']) marcOut = open('ostimarc.mrc', 'w') dc = '{http://purl.org/dc/elements/1.1/}' dcq = '{http://purl.org/dc/terms/}' for number in csv_f: ostiId = number[0] marc = Record() # Create a new record for each loop. tree = etree.parse('http://www.osti.gov/scitech/scitechxml?Identifier='+ ostiId+ '.xml') for node in tree.iter(): if node.tag == dc + 'ostiId': if node.text == ostiId: o = node.getparent() osti = o.getchildren() getRecs(osti, data) getMarc(osti, marc) marcOut.write(marc.as_marc()) # Write each new record.
def json_to_marc(infilename, outfilename): print('Processing: ' + infilename) #progress message data = json.load(open(infilename, "r")) record = Record(force_utf8=True) #create MARC record, enforce Unicode # add fields 006, 007 and 008 with minimal physical information to every marc file record.add_field(Field(tag='006', data="m")) record.add_field(Field(tag='007', data="cr")) # the iana language code from the json file is taken, checked against the list of language codes, # substituted with its iso639-2 equivalent and put in position 21-24 of the field 008 content field008val = " o 0eng d" # DEFAULT ENG try: if 'languages' in data and data['languages'][0] is not None: field008val = field008val[0:21] + lang_map.get( data['languages'][0], " ") + field008val[24:] except IndexError: field008val = field008val[0:21] + " " + field008val[24:] record.add_field(Field(tag='008', data=field008val)) # extract issn, in json 'generic' and/or 'electronic', and put into separate subfields of 022 if "identifiers" in data and "issn" in data["identifiers"]: field_issn = Field(tag='022', indicators=['0', '#']) if "generic" in data["identifiers"]["issn"]: field_issn.add_subfield('a', data["identifiers"]["issn"]["generic"][0]) if "electronic" in data["identifiers"]["issn"]: field_issn.add_subfield( 'l', data["identifiers"]["issn"]["electronic"][0]) record.add_field(field_issn) # title of the series or journal if data["is_part_of"] is not None and data["is_part_of"]['title_full']: record.add_field( Field(tag='245', indicators=['0', '0'], subfields=["a", data["is_part_of"]["title_full"][:9000]])) if data["title"]: record.add_field( Field(tag='246', indicators=['0', '0'], subfields=["a", data["title"][:9000]])) if data["year"]: record.add_field( Field(tag="260", indicators=["#", "#"], subfields=["c", data["year"]])) # add field 506 to all records, as not present in all json files record.add_field( Field(tag='506', indicators=['0', '#'], subfields=["a", "Open access"])) # some json files contain a very long description; the maximum length of data in a variable field #in MARC21 is 9,999 bytes, so here only a certain amount of content is put into the 520 field if data["description"]: record.add_field( Field(tag='520', indicators=['2', '#'], subfields=["a", data["description"][:9000]])) # keep together the journal url, host and domain as different subfields of field 856 # check if either exists, before initializing a new field instance if data['url'] or (data['is_part_of'] is not None and data['is_part_of']['url']): field = Field(tag='856', indicators=['0', '0']) if data['domain']: field.add_subfield('a', data['domain']) if data['is_part_of'] is not None and data['is_part_of']['url']: field.add_subfield('d', data['is_part_of']['url']) if data['url']: field.add_subfield('u', data['url']) record.add_field(field) if data["volume"]: record.add_field( Field(tag='866', indicators=['0', '0'], subfields=["a", data["volume"]])) #output marc file with same filename in Output directory out = open(outfilename, 'wb') out.write(record.as_marc()) out.close() # execute function for creating separate records for subordinate resources if data['subordinate_resources'] is not None: subordinate_records = create_subordinate_records( record, data['subordinate_resources']) counter = 0 # add counter and "-sub" to filenames of subordinate records for subordinate_record in subordinate_records: out = open( outfilename.replace(".marc", "-sub" + str(counter) + ".marc"), 'wb') out.write(subordinate_record.as_marc()) out.close() counter = counter + 1
notimpl = {} records = {} c = conn.cursor() record = Record(file_encoding="utf-8", force_utf8=True) record.myextra = {} last = None for l in c.execute('SELECT * FROM DBF_database_library_DAMEDB_dbf'): if len(l.strip()) < 1: if record.title(): log("**New record writing old one: ", record['001'], '\n', '-' * 20) with open('marcout.mrc', 'ab') as o: o.write(record.as_marc()) records[record['001']] = record record = Record(file_encoding="utf-8", force_utf8=True) record.myextra = {} pass elif l.startswith("###"): pass elif l.startswith('M10'): log("Urls have a non number therefore are extracted first") record.add_field( Field(tag='856', indicators=['4', '2'], subfields=['u', l[4:]])) elif l.startswith('B03 '): # TODO Lehrer/Schüler Bibliothek pass elif l.startswith('651i'): # TODO Lehrer/Schüler Bibliothek
#------------------------------------------------- f = open('ostinos2.csv') csv_f = csv.reader(f) out = open ('osti_recs2.csv', 'w') data = csv.writer(out) data.writerow(['Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', '']) marcOut = open('ostimarc.mrc', 'a') dc = '{http://purl.org/dc/elements/1.1/}' dcq = '{http://purl.org/dc/terms/}' for number in csv_f: ostiId = number[0] marc = Record() results = requests.get('http://www.osti.gov/scitech/scitechxml?Identifier="' + ostiId + '"') tree = etree.fromstring(results.content) for node in tree.iter(): if node.tag == dc + 'identifierReport': #if node.text == ostiId: o = node.getparent() osti = o.getchildren() getRecs(osti, data) getMarc(osti, marc) marcOut.write(marc.as_marc())
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') # parser.add_argument('--production', help='production DOIs', action='store_true') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) # print(col_headers) # print() title_col = col_headers.index('Title') subj_col = col_headers.index('Subject') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') date_col = col_headers.index('Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') source_col = col_headers.index('Source') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') hist_note_col = col_headers.index('Historical Note') note_col = col_headers.index('Note') characters_col = col_headers.index('Characters') story_arc_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') series_col = col_headers.index('Is Part of Series') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam 2200000Ii 4500' field_040 = Field( tag='040', indicators=[' ', ' '], subfields=['a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag='049', indicators=[' ', ' '], subfields=['a', 'VMCM']) field_336_text = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field( tag='336', indicators=[' ', ' '], subfields=['a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag='337', indicators=[' ', ' '], subfields=['a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag='338', indicators=[' ', ' '], subfields=['a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag='380', indicators=[' ', ' '], subfields=['a', 'Comic books and graphic novels.']) field_506 = Field( tag='506', indicators=['1', ' '], subfields=[ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.' ]) field_542 = Field(tag='542', indicators=[' ', ' '], subfields=[ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/' ]) field_588 = Field( tag='588', indicators=['0', ' '], subfields=[ 'a', 'Description based on indicia and Grand Comics Database.' ]) field_989 = Field(tag='989', indicators=[' ', ' '], subfields=['a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) lower_title = parse_title(lowercase_title(title)) title = parse_title(sheet.cell(row, title_col).value) has_part_title = False if len(title) == 3: has_part_title = True subj = sheet.cell(row, subj_col).value subj = [x.strip() for x in subj.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = sheet.cell(row, pages_col).value date = sheet.cell(row, date_col).value[0:4] pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value source = sheet.cell(row, source_col).value # writer = sheet.cell(row, writer_col).value writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value # print(colorist) # print('COLORIST FROM SHEET=' + colorist + '=END') # print(bool(colorist)) colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] hist_note = [] if sheet.cell(row, hist_note_col).value: hist_note = sheet.cell(row, hist_note_col).value note = [] if sheet.cell(row, note_col).value: note = sheet.cell(row, note_col).value characters = [] if sheet.cell(row, characters_col).value: characters = sheet.cell(row, characters_col).value characters = [x.strip() for x in characters.split(';')] story_arc = [] if sheet.cell(row, story_arc_col).value: story_arc = sheet.cell(row, story_arc_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value series = sheet.cell(row, series_col).value # print(cover_artist) # print(characters) # print(writer) # print(subfields_from_string(writer[0])) # print(name_direct_order(subfields_from_string(writer[0])[1])) # print(title) # print(parse_title(title)) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') data_008 = today + 't' + date + date + 'xx a 6 000 1 eng d' field_008 = Field(tag='008', data=data_008) record.add_ordered_field(field_008) subfields_099 = [] if has_part_title: subfields_099 = ['a', title[0] + ': ' + title[1], 'a', title[2]] else: subfields_099 = ['a', title[0], 'a', title[1]] field_099 = Field(tag='099', indicators=[' ', '9'], subfields=subfields_099) record.add_ordered_field(field_099) if writer: # Add 100 for first writer subfield_content = subfields_from_string_relator( writer[0], 'writer') field_100 = Field(tag='100', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_100) # Multiple writers if len(writer) > 1: # Add 700s for all writers after the first for i in writer[1:]: subfield_content = subfields_from_string_relator( i, 'writer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if writer: f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title[0], 'The '): f245_ind2 = 4 elif str.startswith(title[0], 'An '): f245_ind2 = 3 elif str.startswith(title[0], 'A '): f245_ind2 = 2 subfields_245 = [] if has_part_title: subfields_245 = [ 'a', title[0] + '.', 'p', title[1] + ',', 'n', title[2] ] else: subfields_245 = ['a', title[0] + ',', 'n', title[1]] # If writer exists, add $c if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append( name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag='245', indicators=[f245_ind1, f245_ind2], subfields=subfields_245) record.add_ordered_field(field_245) field_264_1 = Field(tag='264', indicators=[' ', '1'], subfields=[ 'a', pub_place + ' :', 'b', publisher + ',', 'c', date + '.' ]) record.add_ordered_field(field_264_1) field_264_4 = Field(tag='264', indicators=[' ', '4'], subfields=['c', '©' + date]) record.add_ordered_field(field_264_4) field_300 = Field(tag='300', indicators=[' ', ' '], subfields=[ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.' ]) record.add_ordered_field(field_300) subfields_490 = [] if has_part_title: subfields_490 = [ 'a', lower_title[0] + '. ' + lower_title[1] + ' ;', 'v', lower_title[2] ] else: subfields_490 = ['a', lower_title[0] + ' ;', 'v', lower_title[1]] field_490 = Field(tag='490', indicators=['1', ' '], subfields=subfields_490) record.add_ordered_field(field_490) if hist_note: field_500_hist = Field(tag='500', indicators=[' ', ' '], subfields=['a', hist_note + '.']) record.add_ordered_field(field_500_hist) if note: field_500_note = Field(tag='500', indicators=[' ', ' '], subfields=['a', note + '.']) record.add_ordered_field(field_500_note) if toc: if not toc.endswith('.') and not toc.endswith( '?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag='505', indicators=['0', ' '], subfields=['a', toc]) record.add_ordered_field(field_505) if story_arc: field_520 = Field(tag='520', indicators=[' ', ' '], subfields=[ 'a', '"' + story_arc + '" -- Grand Comics Database.' ]) record.add_ordered_field(field_520) field_561 = Field(tag='561', indicators=[' ', ' '], subfields=['a', source + '.']) record.add_ordered_field(field_561) for i in subj: if not i.endswith('.') and not i.endswith(')'): i += '.' field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag='655', indicators=[' ', '7'], subfields=['a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: # print(characters) subfield_content = 'Characters: ' for i in characters[:-1]: subfield_content += i + '; ' subfield_content += characters[-1] + '.' field_500 = Field(tag='500', indicators=[' ', ' '], subfields=['a', subfield_content]) record.add_ordered_field(field_500) # Create 600 and 650 for "Fictitious character" entries # TODO check for existing 650 and don't add if a duplicate if any('Fictitious character' in c for c in characters): fic_chars = [ c for c in characters if 'Fictitious character' in c ] for i in fic_chars: fic_char_name = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<1>', i) fic_char_c = re.sub( r'^(.*?) (\(Fictitious character.*\))$', r'\g<2>', i) field_600 = Field( tag='600', indicators=['0', '0'], subfields=['a', fic_char_name, 'c', fic_char_c]) record.add_ordered_field(field_600) field_650 = Field(tag='650', indicators=[' ', '0'], subfields=['a', i]) record.add_ordered_field(field_650) if penciller: for i in penciller: subfield_content = subfields_from_string_relator( i, 'penciller') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if inker: for i in inker: subfield_content = subfields_from_string_relator(i, 'inker') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if colorist: for i in colorist: subfield_content = subfields_from_string_relator(i, 'colorist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if letterer: for i in letterer: subfield_content = subfields_from_string_relator(i, 'letterer') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if cover_artist: for i in cover_artist: subfield_content = subfields_from_string_relator( i, 'cover artist') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) if editor: for i in editor: subfield_content = subfields_from_string_relator(i, 'editor') field_700 = Field(tag='700', indicators=['1', ' '], subfields=subfield_content) record.add_ordered_field(field_700) # field_700 = Field(tag = '700', # indicators = ['7',' '], # subfields = [ # 'a', doi, # '2', 'doi']) subfields_773 = subfields_from_string(series) field_773 = Field(tag='773', indicators=['0', '8'], subfields=subfields_773) record.add_ordered_field(field_773) subfields_830 = [] if has_part_title: subfields_830 = [ 'a', lower_title[0] + '.', 'p', lower_title[1] + ' ;', 'v', lower_title[2] + '.' ] else: subfields_830 = [ 'a', lower_title[0] + ' ;', 'v', lower_title[1] + '.' ] field_830 = Field(tag='830', indicators=[' ', '0'], subfields=subfields_830) record.add_ordered_field(field_830) outmarc.write(record.as_marc()) print() outmarc.close()
out = open('osti_recs.csv', 'w') data = csv.writer(out) data.writerow([ 'Title', 'Author', 'Date', 'Subjects', 'Description', 'OstiID', 'DOI', 'Report Number', 'DOE Number', 'URL', '' ]) marcOut = open('ostimarc.mrc', 'w') marc = Record() dc = '{http://purl.org/dc/elements/1.1/}' dcq = '{http://purl.org/dc/terms/}' for number in csv_f: ostiId = number[0] results = requests.get( 'http://www.osti.gov/scitech/scitechxml?Identifier=' + ostiId) tree = etree.fromstring(results.content) for node in tree.iter(): if node.tag == dc + 'ostiId': if node.text == ostiId: o = node.getparent() osti = o.getchildren() getRecs(osti) getMarc(osti) continue marcOut.write(marc.as_marc())
def to_marc(obj): primary_author = format_author(obj['z_authors'][0], reverse=True) if obj['z_authors'] else None split_title = [part.strip() for part in title_splitter.split(obj['title'])] if len(split_title) > 2: primary_title = f'{split_title[0]} {split_title[1]}' remainder_of_title = f'{" ".join(split_title[2:])} /' else: primary_title = f'{" ".join(split_title)} /' remainder_of_title = '' record = Record() record.leader.type_of_record = 'a' record.leader.coding_scheme = 'a' record.bibliographic_level = 'm' record.cataloging_form = 'a' # record.add_field(Field( # tag = '041', # indicators = ['#', '#'], # subfields = [ # 'a', 'Eng' # ])) if primary_author: record.add_field( Field(tag='100', indicators=['0', ' '], subfields=['a', primary_author])) if remainder_of_title: record.add_field( Field(tag='245', indicators=['0', '0'], subfields=[ 'a', primary_title, 'b', remainder_of_title, 'c', format_authors(obj['z_authors']) ])) else: record.add_field( Field(tag='245', indicators=['0', '0'], subfields=[ 'a', f"{obj['title']} /", 'c', format_authors(obj['z_authors']) ])) record.add_field( Field(tag='260', indicators=[' ', ' '], subfields=['b', obj['publisher'], 'c', str(obj['year'])])) record.add_field( Field(tag='500', indicators=[' ', ' '], subfields=[ 'a', f"Article from {obj['journal_name']}.", ])) record.add_field( Field(tag='856', indicators=['4', '0'], subfields=[ 'u', obj['best_oa_location']['url'], 'y', 'View article as PDF' ])) record.add_field( Field(tag='856', indicators=[' ', ' '], subfields=['u', obj['doi_url'], 'y', 'DOI'])) # print(str(record)) return record.as_marc()
def output_to_marc(output_text, doc_year, doc_num, non_filing, doc_title, doc_iss_year, num_pages, welcomes, this_res, date_field_008, time_field_590, field_710, time_field_856, url_entry): ''' Outputs data in MARC format via PyMARC. Field 999 includes a random number generator to get around the problem of the LMS needing unique item ids. ''' record = Record() record.add_field(Field( tag='000', data='im 0c', )) record.add_field( Field( tag='008', data=f'{date_field_008}n 000 0 eng u', )) record.add_field( Field(tag='080', indicators=[' ', ' '], subfields=[ 'a', f'657 {doc_year}{doc_num}', ])) record.add_field( Field(tag='110', indicators=['2', ' '], subfields=[ 'a', 'COMPANY', ])) record.add_field( Field(tag='245', indicators=['1', f'{str(non_filing)}'], subfields=[ 'a', f'{doc_title}', ])) record.add_field( Field(tag='260', indicators=[' ', ' '], subfields=[ 'a', 'London : ', 'b', 'COMPANY, ', 'c', f'{doc_iss_year}', ])) record.add_field( Field(tag='300', indicators=[' ', ' '], subfields=[ 'a', f'{str(num_pages)} pages', ])) record.add_field( Field(tag='336', indicators=[' ', ' '], subfields=[ 'a', 'text |2rdacontent', ])) record.add_field( Field(tag='337', indicators=[' ', ' '], subfields=[ 'a', 'unmediated |2rdamedia', ])) record.add_field( Field(tag='338', indicators=[' ', ' '], subfields=[ 'a', 'volume |2rdacarrier', ])) record.add_field( Field(tag='490', indicators=[' ', ' '], subfields=[ 'a', 'COMPANY ; ', 'v', f'{doc_num}/{doc_year}', ])) record.add_field( Field(tag='500', indicators=[' ', ' '], subfields=[ 'a', f'"{welcomes}... {this_res}"', ])) record.add_field( Field(tag='590', indicators=[' ', ' '], subfields=[ 'a', f'{time_field_590}', ])) record.add_field( Field(tag='650', indicators=[' ', ' '], subfields=[ 'a', '', ])) record.add_field( Field(tag='650', indicators=[' ', ' '], subfields=[ 'a', '', ])) record.add_field( Field(tag='710', indicators=['2', ' '], subfields=[ 'a', f'{field_710}', ])) record.add_field( Field(tag='710', indicators=['2', ' '], subfields=[ 'a', 'COMPANY', ])) record.add_field( Field(tag='830', indicators=[' ', ' '], subfields=[ 'a', 'COMPANY', ])) record.add_field( Field(tag='856', indicators=[' ', ' '], subfields=[ 'a', f'In {time_field_856} available at : ', 'u', f'{url_entry}', ])) record.add_field( Field(tag='999', indicators=[' ', ' '], subfields=[ 'a', f'087 {doc_year}{doc_num}', 'w', 'UDC', 'c', '1', 'i', f'{random.randint(1000000000,9999999999)}', 'l', 'STORE', 'm', 'COMPANY-LIB', 'r', 'Y', 's', 'Y', 't', 'REFERENCE', ])) record.add_field( Field(tag='999', indicators=[' ', ' '], subfields=[ 'a', f'657 {doc_year}{doc_num}', 'w', 'UDC', 'c', '1', 'i', f'{random.randint(1000000000,9999999999)}', 'l', 'STORE', 'm', 'COMPANY-LIB', 'r', 'Y', 's', 'Y', 't', 'LENDING', ])) # Write MARC file to disk. with open(f'.\\marc_files\\marc_{doc_year}-{doc_num}.mrc', 'wb') as out: out.write(record.as_marc())
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('setname', help='bepress collection setname (e.g., diss201019)') parser.add_argument( 'input', help= 'path to bepress spreadsheet (containing DOIs) in "Excel 97-2003 Workbook (.xls)" format' ) # parser.add_argument('output', help='save directory') # parser.add_argument('--production', help='production DOIs', action='store_true') args = parser.parse_args(arglist) # Read config file and parse setnames into lists by category config = configparser.ConfigParser(allow_no_value=True) config.read('local_settings.ini') etd_setnames = [] for i in config.items('ETD'): etd_setnames.append(i[0]) # Add additional categories here setname = args.setname input = Path(args.input) # jsonmerge setup schema = {"properties": {"entries": {"mergeStrategy": "append"}}} merger = Merger(schema) # Timestamp output date_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S') print(date_time) print() print('------------------------------------------------------------') print('------------------------------------------------------------') # Read Bepress spreadsheet # TODO check that setname matches spreadsheet? # print() # print('Reading spreadsheet...') book_in = xlrd.open_workbook(str(input)) sheet1 = book_in.sheet_by_index(0) # get first sheet # sheet1_name = book_in.sheet_names()[0] # name of first sheet sheet1_col_headers = sheet1.row_values(0) try: doi_col_index = sheet1_col_headers.index('doi') except ValueError: print('DOI field not found in bepress metadata') url_col_index = sheet1_col_headers.index('calc_url') # Read URLs and DOIs from spreadsheet bepress_data = {} for row in range(1, sheet1.nrows): bepress_url = sheet1.cell(row, url_col_index).value bepress_doi = sheet1.cell(row, doi_col_index).value bepress_data[bepress_url] = bepress_doi print(bepress_data) # Read query criteria from file, inserting setname and starting bib number with open('query_setname_no_doi_bib_limiter.json', 'r') as file: data = file.read().replace('SETNAME', setname).replace('bxxxxxxx', 'b1000000') # print(data) # Authenticate to get token, using Client Credentials Grant https://techdocs.iii.com/sierraapi/Content/zReference/authClient.htm key_secret = config.get('Sierra API', 'key') + ':' + config.get( 'Sierra API', 'secret') key_secret_encoded = base64.b64encode( key_secret.encode('UTF-8')).decode('UTF-8') headers = { 'Authorization': 'Basic ' + key_secret_encoded, 'Content-Type': 'application/x-www-form-urlencoded' } response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/token', headers=headers) j = response.json() token = j['access_token'] auth = 'Bearer ' + token headers = {'Accept': 'application/json', 'Authorization': auth} # Search Sierra for records with URL+setname and no DOI in 024 field limit = 2000 response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit=' + str(limit), headers=headers, data=data) # print(response.text) j = response.json() records_returned = j['total'] # print('Records returned:', j['total']) j_all = j if j['total'] == 0: print('No ' + setname + ' records in Sierra are missing DOIs') else: # If limit was reached, repeat until all record IDs are retrieved while j['total'] == limit: # print('--------------------------------') last_record_id = j['entries'][-1:][0]['link'].replace( 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '') # print('id of last record returned:', last_record_id) next_record_id = str(int(last_record_id) + 1) # print('id of starting record for next query:', next_record_id) # Read query criteria from file, inserting setname with open('query_setname_no_doi_bib_limiter.json', 'r') as file: data = file.read().replace('SETNAME', setname).replace( 'bxxxxxxx', 'b' + next_record_id) response = requests.request( 'POST', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/query?offset=0&limit=' + str(limit), headers=headers, data=data) j = response.json() records_returned += j['total'] print('Found ' + records_returned + ' ' + setname + ' Sierra records that are missing DOIs') # print(response.text) # Add new response to previous ones j_all = merger.merge(j_all, j) j_all['total'] = records_returned # print(j_all) # Put bib IDs in list bib_id_list = [] for i in j_all['entries']: bib_id = i['link'].replace( 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', '') bib_id_list.append(bib_id) # print(bib_id_list) # Get bib varField info for all records, 500 bib IDs at a time fields = 'varFields' #querystring = {'id':'3323145', 'fields':fields} j_data_all = {} records_returned_data = 0 chunk_size = 499 for i in range(0, len(bib_id_list), chunk_size): bib_id_list_partial = bib_id_list[i:i + chunk_size] querystring = { 'id': ','.join(bib_id_list_partial), 'fields': fields, 'limit': limit } response = requests.request( 'GET', 'https://catalog.lib.jmu.edu/iii/sierra-api/v5/bibs/', headers=headers, params=querystring) j_data = response.json() records_returned_data += j_data['total'] j_data_all = merger.merge(j_data_all, j_data) j_data_all['total'] = records_returned_data # Parse varField data for OCLC number and URL sierra_data = {} for i in j_data_all['entries']: id = i['id'] var_fields = i['varFields'] sierra_url = '' for v in var_fields: if 'marcTag' in v: if '001' in v['marcTag']: oclc_num = v['content'] if '856' in v['marcTag']: for s in v['subfields']: if 'u' in s['tag']: if 'commons.lib.jmu.edu' in s['content']: if sierra_url: sierra_url += ';' sierra_url += s['content'] # Turn bib id into bib number bib_reversed = id[::-1] total = 0 for i, digit, in enumerate(bib_reversed): prod = (i + 2) * int(digit) total += prod checkdigit = total % 11 if checkdigit == 10: checkdigit = 'x' bib_num = 'b' + id + str(checkdigit) # print(bib_num) # print('OCLC number:', oclc_num) # print('URL:', sierra_url) # print() sierra_data[bib_num] = (oclc_num, sierra_url) print(sierra_data) # Create short MARC records with bib number and DOI fields, and create spreadsheet with OCLC numbers and DOI fields outmarc = open('shortrecs.mrc', 'wb') outtext = open('searchkeys.txt', 'w') outbook = xlwt.Workbook() outsheet = outbook.add_sheet('Sheet 1') col_headers = ['OCLC Number', 'Bib Number', '024', '856'] for x, y in enumerate(col_headers, 0): outsheet.write(0, x, y) outbook.save('OCLC Changes.xls') for i, j in enumerate(sierra_data, 1): print(i) print(j) # Get DOI from spreadsheet data doi_url = bepress_data[sierra_data[j][1]] print(doi_url) doi = doi_url.replace('https://doi.org/', '') spreadsheet_024 = '7\$a' + doi + '$2doi' #spreadsheet_856 = '40$zFull-text of dissertation on the Internet$u' + doi_url # Get OCLC number oclcnum = sierra_data[j][0] field_907 = Field(tag='907', indicators=[' ', ' '], subfields=['a', '.' + j]) field_024 = Field(tag='024', indicators=['7', ' '], subfields=['a', doi, '2', 'doi']) field_856 = Field(tag='856', indicators=['4', '0'], subfields=[ 'z', 'Full-text of dissertation on the Internet', 'u', doi_url ]) field_506_1 = Field( tag='506', indicators=['0', ' '], subfields=[ 'a', 'James Madison University Libraries is providing a metadata record and hyperlink to this full-text resource.', 'f', 'Unrestricted online access', '2', 'star' ]) field_506_2 = Field(tag='506', indicators=['0', ' '], subfields=[ 'a', 'Open access content.', 'f', 'Open access content', '2', 'star' ]) field_540 = Field( tag='540', indicators=[' ', ' '], subfields=[ 'a', 'This work is licensed under a Creative Commons Attribution-NonCommercial-No Derivative Works 4.0 License.', 'u', 'https://creativecommons.org/licenses/by-nc-nd/4.0/legalcode' ]) record = Record() record.add_ordered_field(field_907) record.add_ordered_field(field_024) record.add_ordered_field(field_856) record.add_ordered_field(field_506_1) record.add_ordered_field(field_506_2) record.add_ordered_field(field_540) outmarc.write(record.as_marc()) outtext.write(oclcnum + '\n') outsheet.write(i, 0, oclcnum) outsheet.write(i, 1, j) outsheet.write(i, 2, spreadsheet_024) outsheet.write(i, 3, doi_url) outbook.save('OCLC Changes.xls') outmarc.close() outtext.close()
def main(arglist): parser = argparse.ArgumentParser() parser.add_argument('input', help='path to spreadsheet') # parser.add_argument('output', help='save directory') args = parser.parse_args(arglist) input = Path(args.input) # Read spreadsheet book_in = xlrd.open_workbook(str(input)) sheet = book_in.sheet_by_index(0) # get first sheet col_headers = sheet.row_values(0) title_col = col_headers.index('Title') subj_person_col = col_headers.index('Subject_Person') subj_topical_col = col_headers.index('Subject_Topical') subj_place_col = col_headers.index('Subject_Place') subj_corp_col = col_headers.index('Subject_Jurisdictional') genre_col = col_headers.index('Genre') pages_col = col_headers.index('Pages') pub_date_col = col_headers.index('Date') # previously Publication Date copy_date_col = col_headers.index('Copyright Date') pub_place_col = col_headers.index('Pub_Place') publisher_col = col_headers.index('Publisher') edition_col = col_headers.index('Edition') source_col = col_headers.index('Source') # source_acq_col = col_headers.index('Source of Acquisition') writer_col = col_headers.index('Writer') penciller_col = col_headers.index('Penciller') inker_col = col_headers.index('Inker') colorist_col = col_headers.index('Colorist') letterer_col = col_headers.index('Letterer') cover_artist_col = col_headers.index('Cover Artist') editor_col = col_headers.index('Editor') # hist_note_col = col_headers.index('Historical Note') notes_col = col_headers.index('Note') characters_col = col_headers.index('Characters') synopsis_col = col_headers.index('Story Arc') toc_col = col_headers.index('Table of Contents') in_series_col = col_headers.index('Is Part of Series') black_creators_col = col_headers.index('Black Creators (MARC 590)') black_chars_col = col_headers.index('Black Characters (MARC 590)') isbn_col = col_headers.index('ISBN') color_col = col_headers.index('Color?') series_note_col = col_headers.index('Series Note') copyright_holder_col = col_headers.index('Copyright holder') gcd_uri_col = col_headers.index('Grand Comics Database') outmarc = open('records.mrc', 'wb') # Boilerplate fields field_ldr = '00000nam a2200000Ii 4500' field_040 = Field(tag = '040', indicators = [' ',' '], subfields = [ 'a', 'VMC', 'b', 'eng', 'e', 'rda', 'c', 'VMC']) field_049 = Field(tag = '049', indicators = [' ',' '], subfields = [ 'a', 'VMCS']) field_336_text = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'text', 'b', 'txt', '2', 'rdacontent']) field_336_image = Field(tag = '336', indicators = [' ',' '], subfields = [ 'a', 'still image', 'b', 'sti', '2', 'rdacontent']) field_337 = Field(tag = '337', indicators = [' ',' '], subfields = [ 'a', 'unmediated', 'b', 'n', '2', 'rdamedia']) field_338 = Field(tag = '338', indicators = [' ',' '], subfields = [ 'a', 'volume', 'b', 'nc', '2', 'rdacarrier']) field_380 = Field(tag = '380', indicators = [' ',' '], subfields = [ 'a', 'Comic books and graphic novels.']) field_506 = Field(tag = '506', indicators = ['1',' '], subfields = [ 'a', 'Collection open to research. Researchers must register and agree to copyright and privacy laws before using this collection. Please contact Research Services staff before visiting the James Madison University Special Collections Library to use this collection.']) field_542 = Field(tag = '542', indicators = [' ',' '], subfields = [ 'a', 'Copyright not evaluated', 'u', 'http://rightsstatements.org/vocab/CNE/1.0/']) field_588 = Field(tag = '588', indicators = ['0',' '], subfields = [ 'a', 'Description based on indicia and Grand Comics Database.']) field_989 = Field(tag = '989', indicators = [' ',' '], subfields = [ 'a', 'PN6728']) for row in range(1, sheet.nrows): print('Record ' + str(row)) title = sheet.cell(row, title_col).value print(title) subj_person = sheet.cell(row, subj_person_col).value if subj_person: subj_person = [x.strip() for x in subj_person.split(';')] subj_topical = sheet.cell(row, subj_topical_col).value if subj_topical: subj_topical = [x.strip() for x in subj_topical.split(';')] subj_place = sheet.cell(row, subj_place_col).value if subj_place: subj_place = [x.strip() for x in subj_place.split(';')] subj_corp = sheet.cell(row, subj_corp_col).value if subj_corp: subj_corp = [x.strip() for x in subj_corp.split(';')] genre = sheet.cell(row, genre_col).value genre = [x.strip() for x in genre.split(';')] pages = str(sheet.cell(row, pages_col).value) pub_date = str(sheet.cell(row, pub_date_col).value) pub_date_str = date_from_string(pub_date) pub_date_year = year_from_date(pub_date_str) copy_date = '' copy_date = str(sheet.cell(row, copy_date_col).value) copy_date_str = date_from_string(copy_date) copy_date_year = year_from_date(copy_date_str) pub_place = sheet.cell(row, pub_place_col).value publisher = sheet.cell(row, publisher_col).value edition = sheet.cell(row, edition_col).value source = sheet.cell(row, source_col).value # source_acq = sheet.cell(row, source_acq_col).value characters = sheet.cell(row, characters_col).value black_creators = sheet.cell(row, black_creators_col).value if black_creators: black_creators = [x.strip() for x in black_creators.split(';')] black_chars = sheet.cell(row, black_chars_col).value if black_chars: black_chars = [x.strip() for x in black_chars.split(';')] isbn = str(sheet.cell(row, isbn_col).value) color = sheet.cell(row, color_col).value series_note = sheet.cell(row, series_note_col).value gcd_uri = sheet.cell(row, gcd_uri_col).value country_code = country_code_from_pub_place(pub_place) copyright_holder = [] if sheet.cell(row, copyright_holder_col).value: copyright_holder = sheet.cell(row, copyright_holder_col).value copyright_holder = [x.strip() for x in copyright_holder.split(';')] writer = [] if sheet.cell(row, writer_col).value: writer = sheet.cell(row, writer_col).value writer = [x.strip() for x in writer.split(';')] penciller = [] if sheet.cell(row, penciller_col).value: penciller = sheet.cell(row, penciller_col).value penciller = [x.strip() for x in penciller.split(';')] inker = [] if sheet.cell(row, inker_col).value: inker = sheet.cell(row, inker_col).value inker = [x.strip() for x in inker.split(';')] colorist = [] if sheet.cell(row, colorist_col).value: colorist = sheet.cell(row, colorist_col).value colorist = [x.strip() for x in colorist.split(';')] letterer = [] if sheet.cell(row, letterer_col).value: letterer = sheet.cell(row, letterer_col).value letterer = [x.strip() for x in letterer.split(';')] cover_artist = [] if sheet.cell(row, cover_artist_col).value: cover_artist = sheet.cell(row, cover_artist_col).value cover_artist = [x.strip() for x in cover_artist.split(';')] editor = [] if sheet.cell(row, editor_col).value: editor = sheet.cell(row, editor_col).value editor = [x.strip() for x in editor.split(';')] # hist_note = [] # if sheet.cell(row, hist_note_col).value: # hist_note = sheet.cell(row, hist_note_col).value notes = [] if sheet.cell(row, notes_col).value: notes = sheet.cell(row, notes_col).value synopsis = [] if sheet.cell(row, synopsis_col).value: synopsis = sheet.cell(row, synopsis_col).value toc = [] if sheet.cell(row, toc_col).value: toc = sheet.cell(row, toc_col).value in_series = sheet.cell(row, in_series_col).value contribs = {} if copyright_holder: for i in copyright_holder: contribs.update({i: ['copyright holder']}) else: if writer: for i in writer: contribs.update({i: ['writer']}) if penciller: for i in penciller: if i in contribs: role_list = contribs[i] role_list.append('penciller') contribs.update({i: role_list}) else: contribs.update({i: ['penciller']}) if inker: for i in inker: if i in contribs: role_list = contribs[i] role_list.append('inker') contribs.update({i: role_list}) else: contribs.update({i: ['inker']}) if colorist: for i in colorist: if i in contribs: role_list = contribs[i] role_list.append('colorist') contribs.update({i: role_list}) else: contribs.update({i: ['colorist']}) if letterer: for i in letterer: if i in contribs: role_list = contribs[i] role_list.append('letterer') contribs.update({i: role_list}) else: contribs.update({i: ['letterer']}) if cover_artist: for i in cover_artist: if i in contribs: role_list = contribs[i] role_list.append('cover artist') contribs.update({i: role_list}) else: contribs.update({i: ['cover artist']}) if editor: for i in editor: if i in contribs: role_list = contribs[i] role_list.append('editor') contribs.update({i: role_list}) else: contribs.update({i: ['editor']}) record = Record() # Add boilerplate fields record.leader = field_ldr record.add_ordered_field(field_040) record.add_ordered_field(field_049) record.add_ordered_field(field_336_text) record.add_ordered_field(field_336_image) record.add_ordered_field(field_337) record.add_ordered_field(field_338) record.add_ordered_field(field_380) record.add_ordered_field(field_506) record.add_ordered_field(field_542) record.add_ordered_field(field_588) record.add_ordered_field(field_989) # Add other fields today = datetime.today().strftime('%y%m%d') if copy_date: data_008 = today + 't' + pub_date_year + copy_date_year + country_code + 'a 6 000 1 eng d' else: data_008 = today + 's' + pub_date_year + ' ' + country_code + 'a 6 000 1 eng d' field_008 = Field(tag = '008', data = data_008) record.add_ordered_field(field_008) if isbn: field_020 = Field(tag = '020', indicators = [' ',' '], subfields = [ 'a', isbn]) record.add_ordered_field(field_020) subfields_099 = subfields_from_string(title) if 'b' in subfields_099: subfields_099.pop(3) subfields_099.pop(2) if 'n' in subfields_099: subfields_099[subfields_099.index('n')] = 'a' if subfields_099[1].endswith(',') or subfields_099[1].endswith(':'): subfields_099[1] = subfields_099[1][:-1] field_099 = Field(tag = '099', indicators = [' ','9'], subfields = subfields_099) record.add_ordered_field(field_099) for i in contribs: if i == list(contribs.keys())[0] and 'copyright holder' in contribs[i]: # first contributor is copyright holder subfield_content = subfields_from_string_relator(i, contribs[i]) field_110 = Field(tag = '110', indicators = ['2', ' '], subfields = subfield_content) record.add_ordered_field(field_110) elif i == list(contribs.keys())[0] and 'writer' in contribs[i]: # first contributor is a writer subfield_content = subfields_from_string_relator(i, contribs[i]) field_100 = Field(tag = '100', indicators = ['1', ' '], subfields = subfield_content) record.add_ordered_field(field_100) else: subfield_content = subfields_from_string_relator(i, contribs[i]) if ',' not in subfield_content[1]: field_710 = Field(tag = '710', indicators = ['2',' '], subfields = subfield_content) record.add_ordered_field(field_710) else: field_700 = Field(tag = '700', indicators = ['1',' '], subfields = subfield_content) record.add_ordered_field(field_700) if contribs and ('writer' in contribs[list(contribs.keys())[0]] or 'copyright holder' in contribs[list(contribs.keys())[0]]): f245_ind1 = 1 else: f245_ind1 = 0 f245_ind2 = 0 if str.startswith(title, 'The '): f245_ind2 = 4 elif str.startswith(title, 'An '): f245_ind2 = 3 elif str.startswith(title, 'A '): f245_ind2 = 2 subfields_245 = subfields_from_string(title) # If writer exists, add $c for first writer if writer: subfields_245[-1] = subfields_245[-1] + ' /' subfields_245.append('c') subfields_245.append(name_direct_order(subfields_from_string(writer[0])[1]) + ', writer.') else: # If no writer, add 245 ending punctuation subfields_245[-1] = subfields_245[-1] + '.' field_245 = Field(tag = '245', indicators = [f245_ind1, f245_ind2], subfields = subfields_245) record.add_ordered_field(field_245) if edition: if not edition.endswith('.'): edition += '.' field_250 = Field(tag = '250', indicators = [' ', ' '], subfields = [ 'a', edition]) record.add_ordered_field(field_250) field_264_1 = Field(tag = '264', indicators = [' ','1'], subfields = [ 'a', pub_place + ' :', 'b', publisher + ',', 'c', pub_date_str + '.']) record.add_ordered_field(field_264_1) if copy_date: field_264_4 = Field(tag = '264', indicators = [' ','4'], subfields = [ 'c', '©' + copy_date_str]) record.add_ordered_field(field_264_4) if color == 'yes': subfields_300 = [ 'a', pages + ' pages :', 'b', 'chiefly color illustrations.'] elif color == 'no': subfields_300 = [ 'a', pages + ' pages :', 'b', 'black and white illustrations.'] field_300 = Field(tag = '300', indicators = [' ',' '], subfields = subfields_300) record.add_ordered_field(field_300) if title_to_series(title): subfields_490 = title_to_series(title) field_490 = Field(tag = '490', indicators = ['1',' '], subfields = subfields_490) record.add_ordered_field(field_490) if series_note: if not series_note.endswith('.'): series_note += '.' field_490_series_note = Field(tag = '490', indicators = ['1', ' '], subfields = ['a', series_note]) record.add_ordered_field(field_490_series_note) # if hist_note: # field_500_hist = Field(tag = '500', # indicators = [' ',' '], # subfields = [ # 'a', hist_note + '.']) # record.add_ordered_field(field_500_hist) if notes: field_500_notes = Field(tag = '500', indicators = [' ',' '], subfields = [ 'a', notes + '.']) record.add_ordered_field(field_500_notes) if toc: if not toc.endswith('.') and not toc.endswith('?') and not toc.endswith('!'): toc += '.' field_505 = Field(tag = '505', indicators = ['0',' '], subfields = [ 'a', toc]) record.add_ordered_field(field_505) if synopsis: field_520 = Field(tag = '520', indicators = [' ',' '], subfields = [ 'a', synopsis]) record.add_ordered_field(field_520) if black_creators: for i in black_creators: if not i.endswith('.'): i += '.' field_590_creators = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_creators) if black_chars: for i in black_chars: if not i.endswith('.'): i += '.' field_590_chars = Field(tag = '590', indicators = [' ',' '], subfields = [ 'a', i]) record.add_ordered_field(field_590_chars) if source: field_541_source = Field(tag = '541', indicators = [' ',' '], subfields = [ 'a', source + '.']) record.add_ordered_field(field_541_source) # if source_acq: # field_541_source_acq = Field(tag = '541', # indicators = [' ',' '], # subfields = [ # 'a', source_acq + '.']) # record.add_ordered_field(field_541_source_acq) if subj_person: for i in subj_person: i_subfields = subfields_from_string(i) # Set first indicator based on presence of comma in $a if 'a' in i_subfields: if ',' in i_subfields[i_subfields.index('a') + 1]: field_600_ind1 = '1' else: field_600_ind1 = '0' if '1' in i_subfields: last_except_subf1 = i_subfields.index('1') - 1 else: last_except_subf1 = len(i_subfields) - 1 if i_subfields[last_except_subf1].endswith(','): i_subfields[last_except_subf1] = re.sub(r'^(.*),$', r'\g<1>.', i_subfields[last_except_subf1]) if not i_subfields[last_except_subf1].endswith('.') and not i_subfields[last_except_subf1].endswith(')') and not i_subfields[last_except_subf1].endswith('?') and not i_subfields[last_except_subf1].endswith('-'): i_subfields[last_except_subf1] += '.' field_600 = Field(tag = '600', indicators = [field_600_ind1,'0'], subfields = i_subfields) record.add_ordered_field(field_600) if subj_topical: for i in subj_topical: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_650 = Field(tag = '650', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_650) if subj_place: for i in subj_place: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_651 = Field(tag = '651', indicators = [' ','0'], subfields = i_subfields) record.add_ordered_field(field_651) if subj_corp: for i in subj_corp: i_subfields = subfields_from_string(i) if not i_subfields[-1].endswith('.') and not i_subfields[-1].endswith(')'): i_subfields[-1] += '.' field_610 = Field(tag = '610', indicators = ['1','0'], subfields = i_subfields) record.add_ordered_field(field_610) if genre: for i in genre: if not i.endswith('.') and not i.endswith(')'): i += '.' field_655 = Field(tag = '655', indicators = [' ','7'], subfields = [ 'a', i, '2', 'lcgft']) record.add_ordered_field(field_655) if characters: field_500_chars = Field(tag = '500', indicators = [' ', ' '], subfields = [ 'a', characters]) record.add_ordered_field(field_500_chars) if gcd_uri: title_758 = subfields_from_string(title)[1] if title_758.endswith(',') or title_758.endswith(':'): title_758 = title_758[:-1] field_758 = Field(tag = '758', indicators = [' ',' '], subfields = [ '4', 'http://rdaregistry.info/Elements/m/P30135', 'i', 'Has work manifested:', 'a', title_758, '1', gcd_uri]) record.add_ordered_field(field_758) if in_series: subfields_773 = subfields_from_string(in_series) field_773 = Field(tag = '773', indicators = ['0','8'], subfields = subfields_773) record.add_ordered_field(field_773) subfields_852 = [ 'b', 'CARRIER', 'c', 'carrspec'] if len(subfields_099) == 4: subfields_852.append('h') subfields_852.append(subfields_099[1]) subfields_852.append('i') subfields_852.append(subfields_099[3]) if len(subfields_099) == 2: subfields_852.append('h') subfields_852.append(subfields_099[1]) if edition: if edition.endswith('.'): edition = edition[:-1] subfields_852.append('z') subfields_852.append(edition) field_852 = Field(tag = '852', indicators = ['8',' '], subfields = subfields_852) record.add_ordered_field(field_852) outmarc.write(record.as_marc()) print() outmarc.close()
def book_to_mark21_file(book,owner, xml = False): #New record record = Record() # Number and value explanation : http://www.loc.gov/marc/bibliographic/bdleader.html # Adding Leader tags l = list(record.leader) l[5] = 'n' # New l[6] = 'a' #For manuscript file use 't' l[7] = 'm' # Monograph l[9] = 'a' l[19] = '#' record.leader = "".join(l) # Category of material - Text record.add_field(record_control_field('007','t')) #Languages languages = book.languages.all() if languages: for lang in languages: record.add_field(record_control_field('008',lang.code)) else: record.add_field(record_control_field('008','eng')) #ISBN - International Standard Book Number isbn = models.Identifier.objects.filter(book=book).exclude(identifier='pub_id').exclude(identifier='urn').exclude(identifier='doi') for identifier in isbn: if book.book_type: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)+' '+book.book_type])) else: record.add_field(record_field('020',['#','#'],['a', str(identifier.value)])) #Source of acquisition try: base_url = models.Setting.objects.get(group__name='general', name='base_url').value except: base_url='localhost:8000' book_url = 'http://%s/editor/submission/%s/' % (base_url, book.id) record.add_field(record_field('030',['#','#'],['b', book_url])) # Main entry - Personal name authors = book.author.all() author_names='' for author in authors: auhtor_names=author_names+author.full_name()+' ' name=author.last_name+', '+author.first_name if author.middle_name: name=name+' '+author.middle_name[:1]+'.' record.add_field(record_field('100',['1','#'],['a', name])) #Title statement title_words = (book.title).split(' ') first_word = title_words[0] if first_word.lower() == 'the': record.add_field(record_field('245',['1','4'],['a', book.title,'c',author_names])) else: record.add_field(record_field('245',['1','0'],['a', book.title,'c',author_names])) #Publication try: press_name = models.Setting.objects.get(group__name='general', name='press_name').value except: press_name=None try: city = models.Setting.objects.get(group__name='general', name='city').value except: city = None publication_info=[] if book.publication_date: #Press' city if city : publication_info.append('a') publication_info.append(str(city)) #Press' name if press_name: publication_info.append('b') publication_info.append(str(press_name)) #Date of Publication publication_info.append('c') publication_info.append(str(book.publication_date)) record.add_field(record_field('260',['#','#'],publication_info)) #Physical details if book.pages: record.add_field(record_field('300',['#','#'],['a',str(book.pages)+' pages'])) #Content type record.add_field(record_field('336',['#','#'],['a', 'text','2','rdacontent'])) #Media type record.add_field(record_field('337',['#','#'],['a', 'unmediated','2','rdamedia'])) #Carrier type record.add_field(record_field('338',['#','#'],['a', 'volume','2','rdacarrier'])) #Language note if languages: for lang in languages: record.add_field(record_field('546',['#','#'],['a', lang.display])) else: record.add_field(record_field('546',['#','#'],['a', 'In English'])) press_editors = book.press_editors.all() #editors for editor in press_editors: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (editor.last_name,editor.first_name),'e','Press editor'])) #Series if book.series: record.add_field(record_field('830',['#','0'],['a', book.series.name ])) if book.series.editor: record.add_field(record_field('700',['1','#'],['a', '%s, %s' % (book.series.editor.last_name,book.series.editor.first_name),'e','Series editor'])) #Add record to file title= book.title if not xml: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.dat' file=handle_marc21_file(record.as_marc(),filename, book, owner) else: filename='book_'+str(book.id)+'_'+re.sub('[^a-zA-Z0-9\n\.]', '', title.lower())+'_marc21.xml' content=record_to_xml(record, quiet=False, namespace=False) file=handle_marc21_file(content,filename, book, owner) return file.pk
def write(self, record: Record) -> None: """Writes a record.""" Writer.write(self, record) self.file_handle.write(record.as_marc())
rec_505 = Field(tag='505', indicators=['0', ' '], subfields=['a', toc]) new_marc_rec.add_ordered_field(rec_505) #-------------------------------------------- # Create 520 field for the Abstract abstract = fields[16].strip() if not abstract == '': rec_520 = Field(tag='520', indicators=['3', ' '], subfields=['a', abstract]) new_marc_rec.add_ordered_field(rec_520) #-------------------------------------------- # Create 655 field for the Genre/Form term # NOTE: if multiple terms are used in the field, you may want to try to break them up if they're separated by commas or other consistent punctuation # Marked field as $2local since it may not be authorized term genre = fields[17].strip() if not genre == '': rec_655 = Field(tag='655', indicators=[' ', '7'], subfields=['a', genre, '2', 'local']) new_marc_rec.add_ordered_field(rec_655) print 'After:' print new_marc_rec marc_recs_out.write(new_marc_rec.as_marc()) rec_cnt += 1 marc_recs_out.close()