inputfilename = "172_input.tsv" outputfilename = "172_output.mrc" if len(sys.argv) == 3: inputfilename, outputfilename = sys.argv[1:] inputfile = open(inputfilename, "r") outputfile = open(outputfilename, "wb") records = inputfile.readlines() for line in records[1:]: fields = line.split(" ") marcrecord = marcx.Record(force_utf8=True) marcrecord.strict = False # Leader marcrecord.leader = " naa 22 4500" # ID f001 = fields[0] marcrecord.add("001", data="finc-172-" + f001) # 007 marcrecord.add("007", data="cr") # Sprache f041a = fields[10] marcrecord.add("041", a=f041a)
def test_add_indicator(self): obj = marcx.Record() obj.add('980', a='81723', indicators=['0', ' ']) self.assertEqual(['0', ' '], obj['980'].indicators) self.assertEqual('0', obj['980'].indicator1) self.assertEqual(' ', obj['980'].indicator2)
# Default input and output. inputfilename, outputfilename = "14_input.mrc", "14_output.mrc" if len(sys.argv) == 3: inputfilename, outputfilename = sys.argv[1:] inputfile = io.open(inputfilename, "rb") outputfile = io.open(outputfilename, "wb") reader = pymarc.MARCReader(inputfile) for oldrecord in reader: newrecord = marcx.Record() # prüfen, ob Titel vorhanden ist f245 = oldrecord["245"] if not f245: continue # prüfen, ob es sich um Digitalisat handelt f856 = oldrecord["856"] if not f856: continue for field in oldrecord.get_fields("856"): f856 = field if "http" in field else "" # leader
def test_superclass(self): obj = marcx.Record() self.assertTrue(isinstance(obj, pymarc.Record))
def test_constructor_passes_data(self): # data='', to_unicode=False, force_utf8=False, # hide_utf8_warnings=False, utf8_handling='strict' obj = marcx.Record(data=MARCREC, to_unicode=True, force_utf8=True) self.assertEquals(obj.as_marc(), MARCREC)
def test_init(self): r = marcx.Record() self.assertIsNotNone(r)
def test_constructor(self): obj = marcx.Record() self.assertIsNotNone(obj)
def test_add_fails_on_non_string_non_iterables(self): obj = marcx.Record() with self.assertRaises(ValueError): obj.add('020', a=1243)
def test_has(self): obj = marcx.Record() obj.add('020', a='1243') self.assertTrue(obj.has('020')) self.assertTrue(obj.has('020.a')) self.assertFalse(obj.has('020.b'))
def test_add_digits_subfields_with_underscore(self): obj = marcx.Record() obj.add('020', a='978123123', _9='Hello') self.assertEquals( obj.get_fields('020')[0].get_subfields('9'), ['Hello'])
def test_add_repeated_subfields(self): obj = marcx.Record() obj.add('020', a=('9783334444333', '1234')) self.assertEquals(len(obj.get_fields()), 1)
def test_get_first_many_values(self): obj = marcx.Record() for i in range(100): obj.add('020', a='isbn-no-%s' % i) self.assertEquals(obj.firstvalue('020.a'), 'isbn-no-0')
def test_get_first_value(self): obj = marcx.Record(data=MARCREC, to_unicode=True, force_utf8=True) self.assertEquals(obj.firstvalue('041.a'), 'ger') self.assertEquals(obj.firstvalue('260.a'), 'Linz :') self.assertEquals(obj.firstvalue('999.9'), None) self.assertEquals(obj.firstvalue('999.9', default='X'), 'X')
def test_remove_single(self): obj = marcx.Record() obj.add('001', data='123') self.assertEquals(1, len(obj.get_fields('001'))) obj.remove('001') self.assertEquals(0, len(obj.get_fields('001')))
def test_does_not_ignore_invalid_indicator_strings(self): obj = marcx.Record() with self.assertRaises(ValueError): obj.add('980', a='81723', indicators='Welcome')
def imslp_xml_to_marc(s, legacy_mapping=None): """ Convert a string containing a single IMSLP XML record to a pymarc MARC record. Optionally take a legacy mapping, associating IMSLP Names with VIAF identifiers. Blueprint: https://git.io/vpQPd, with one difference: We allow records with no subjects. A record w/o title is an error. We check for them, when we add the field. """ dd = xmltodict.parse(s, force_list={"subject", "languages"}) if legacy_mapping is None: legacy_mapping = collections.defaultdict(lambda: collections.defaultdict(str)) record = marcx.Record(force_utf8=True) record.strict = False doc = dd["document"] record.leader = " ncs 22 450 " identifier = doc["identifier"]["#text"] encoded_id = base64.b64encode(six.b(identifier)).rstrip(b"=") record.add("001", data="finc-15-{}".format(encoded_id.decode("utf-8"))) record.add("007", data="cr") if doc.get("languages", []): langs = [l for l in doc["languages"] if l != "unbekannt"] if langs: record.add("008", data="130227uu20uuuuuuxx uuup%s c" % langs[0]) for l in langs: record.add("041", a=l) creator = doc["creator"]["mainForm"] record.add("100", a=creator, e="cmp", _0=legacy_mapping.get(identifier, {}).get("viaf", "")) record.add("240", a=legacy_mapping.get(identifier, {}).get("title", "")) try: record.add("245", a=html_unescape(doc["title"])) except KeyError: raise ValueError("cannot find title: %s ..." % s[:300]) record.add("246", a=html_unescape(doc.get("additionalTitle", ""))) record.add("260", c=doc.get("date", "")) record.add("650", y=doc.get("date", "")) record.add("500", a=doc.get("abstract", "")) for689 = [] if "subject" in doc: if len(doc["subject"]) == 1: for689.append(doc["subject"][0]["mainForm"]) elif len(doc["subject"]) == 2: for689.append(doc["subject"][1]["mainForm"]) else: raise ValueError("cannot handle %d subjects", len(doc["subject"])) record.add("590", a=for689[0].title(), b=doc.get("music_arrangement_of", "").title()) for689.append(doc.get("music_arrangement_of", "")) for subject in set(for689): record.add("689", a=subject.title()) record.add("700", a=doc.get("contributor", {}).get("mainForm", ""), e="ctb") record.add("856", q="text/html", _3="Petrucci Musikbibliothek", u=doc["url"]["#text"]) record.add("970", c="PN") record.add("980", a=identifier, b="15", c="Petrucci Musikbibliothek") return record