Exemple #1
0
inputfilename = "172_input.tsv"
outputfilename = "172_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = open(inputfilename, "r")
outputfile = open(outputfilename, "wb")

records = inputfile.readlines()

for line in records[1:]:

    fields = line.split("   ")

    marcrecord = marcx.Record(force_utf8=True)
    marcrecord.strict = False

    # Leader
    marcrecord.leader = "     naa  22        4500"

    # ID
    f001 = fields[0]
    marcrecord.add("001", data="finc-172-" + f001)

    # 007
    marcrecord.add("007", data="cr")

    # Sprache
    f041a = fields[10]
    marcrecord.add("041", a=f041a)
Exemple #2
0
 def test_add_indicator(self):
     obj = marcx.Record()
     obj.add('980', a='81723', indicators=['0', ' '])
     self.assertEqual(['0', ' '], obj['980'].indicators)
     self.assertEqual('0', obj['980'].indicator1)
     self.assertEqual(' ', obj['980'].indicator2)
Exemple #3
0

# Default input and output.
inputfilename, outputfilename = "14_input.mrc", "14_output.mrc"

if len(sys.argv) == 3:
    inputfilename, outputfilename = sys.argv[1:]

inputfile = io.open(inputfilename, "rb")
outputfile = io.open(outputfilename, "wb")

reader = pymarc.MARCReader(inputfile)

for oldrecord in reader:

    newrecord = marcx.Record()

    # prüfen, ob Titel vorhanden ist
    f245 = oldrecord["245"]
    if not f245:
        continue

    # prüfen, ob es sich um Digitalisat handelt
    f856 = oldrecord["856"]
    if not f856:
        continue

    for field in oldrecord.get_fields("856"):
        f856 = field if "http" in field else ""

    # leader
Exemple #4
0
 def test_superclass(self):
     obj = marcx.Record()
     self.assertTrue(isinstance(obj, pymarc.Record))
Exemple #5
0
 def test_constructor_passes_data(self):
     # data='', to_unicode=False, force_utf8=False,
     # hide_utf8_warnings=False, utf8_handling='strict'
     obj = marcx.Record(data=MARCREC, to_unicode=True, force_utf8=True)
     self.assertEquals(obj.as_marc(), MARCREC)
Exemple #6
0
 def test_init(self):
     r = marcx.Record()
     self.assertIsNotNone(r)
Exemple #7
0
 def test_constructor(self):
     obj = marcx.Record()
     self.assertIsNotNone(obj)
Exemple #8
0
 def test_add_fails_on_non_string_non_iterables(self):
     obj = marcx.Record()
     with self.assertRaises(ValueError):
         obj.add('020', a=1243)
Exemple #9
0
 def test_has(self):
     obj = marcx.Record()
     obj.add('020', a='1243')
     self.assertTrue(obj.has('020'))
     self.assertTrue(obj.has('020.a'))
     self.assertFalse(obj.has('020.b'))
Exemple #10
0
 def test_add_digits_subfields_with_underscore(self):
     obj = marcx.Record()
     obj.add('020', a='978123123', _9='Hello')
     self.assertEquals(
         obj.get_fields('020')[0].get_subfields('9'), ['Hello'])
Exemple #11
0
 def test_add_repeated_subfields(self):
     obj = marcx.Record()
     obj.add('020', a=('9783334444333', '1234'))
     self.assertEquals(len(obj.get_fields()), 1)
Exemple #12
0
 def test_get_first_many_values(self):
     obj = marcx.Record()
     for i in range(100):
         obj.add('020', a='isbn-no-%s' % i)
     self.assertEquals(obj.firstvalue('020.a'), 'isbn-no-0')
Exemple #13
0
 def test_get_first_value(self):
     obj = marcx.Record(data=MARCREC, to_unicode=True, force_utf8=True)
     self.assertEquals(obj.firstvalue('041.a'), 'ger')
     self.assertEquals(obj.firstvalue('260.a'), 'Linz :')
     self.assertEquals(obj.firstvalue('999.9'), None)
     self.assertEquals(obj.firstvalue('999.9', default='X'), 'X')
Exemple #14
0
 def test_remove_single(self):
     obj = marcx.Record()
     obj.add('001', data='123')
     self.assertEquals(1, len(obj.get_fields('001')))
     obj.remove('001')
     self.assertEquals(0, len(obj.get_fields('001')))
Exemple #15
0
 def test_does_not_ignore_invalid_indicator_strings(self):
     obj = marcx.Record()
     with self.assertRaises(ValueError):
         obj.add('980', a='81723', indicators='Welcome')
Exemple #16
0
def imslp_xml_to_marc(s, legacy_mapping=None):
    """
    Convert a string containing a single IMSLP XML record to a pymarc MARC record.

    Optionally take a legacy mapping, associating IMSLP Names with VIAF
    identifiers. Blueprint: https://git.io/vpQPd, with one difference: We
    allow records with no subjects.

    A record w/o title is an error. We check for them, when we add the field.
    """
    dd = xmltodict.parse(s, force_list={"subject", "languages"})

    if legacy_mapping is None:
        legacy_mapping = collections.defaultdict(lambda: collections.defaultdict(str))

    record = marcx.Record(force_utf8=True)
    record.strict = False

    doc = dd["document"]

    record.leader = "     ncs  22        450 "

    identifier = doc["identifier"]["#text"]
    encoded_id = base64.b64encode(six.b(identifier)).rstrip(b"=")
    record.add("001", data="finc-15-{}".format(encoded_id.decode("utf-8")))

    record.add("007", data="cr")

    if doc.get("languages", []):
        langs = [l for l in doc["languages"] if l != "unbekannt"]
        if langs:
            record.add("008", data="130227uu20uuuuuuxx uuup%s  c" % langs[0])
            for l in langs:
                record.add("041", a=l)

    creator = doc["creator"]["mainForm"]
    record.add("100", a=creator, e="cmp", _0=legacy_mapping.get(identifier, {}).get("viaf", ""))

    record.add("240", a=legacy_mapping.get(identifier, {}).get("title", ""))

    try:
        record.add("245", a=html_unescape(doc["title"]))
    except KeyError:
        raise ValueError("cannot find title: %s ..." % s[:300])

    record.add("246", a=html_unescape(doc.get("additionalTitle", "")))

    record.add("260", c=doc.get("date", ""))
    record.add("650", y=doc.get("date", ""))
    record.add("500", a=doc.get("abstract", ""))

    for689 = []

    if "subject" in doc:
        if len(doc["subject"]) == 1:
            for689.append(doc["subject"][0]["mainForm"])
        elif len(doc["subject"]) == 2:
            for689.append(doc["subject"][1]["mainForm"])
        else:
            raise ValueError("cannot handle %d subjects", len(doc["subject"]))

        record.add("590", a=for689[0].title(), b=doc.get("music_arrangement_of", "").title())

        for689.append(doc.get("music_arrangement_of", ""))

        for subject in set(for689):
            record.add("689", a=subject.title())

    record.add("700", a=doc.get("contributor", {}).get("mainForm", ""), e="ctb")
    record.add("856", q="text/html", _3="Petrucci Musikbibliothek", u=doc["url"]["#text"])
    record.add("970", c="PN")
    record.add("980", a=identifier, b="15", c="Petrucci Musikbibliothek")
    return record