Example #1
0
 def test_DOC_03692895X(self):
     em = marcx.marcdoc(DOC_03692895X)
     self.assertNotEquals(None, em)
     self.assertEquals(em.x245a, [u'De hydrophobia nonnulla /'])
     self.assertEquals(em.x245b, [])
     self.assertEquals(em.x100a, [u'Nahmer, Friedrich Wilhelm V. D.'])
     self.assertEquals(em.x700a, [])
     self.assertEquals(list(em.isbns()), [])
Example #2
0
 def test_DOC_03692895X(self):
     em = marcx.marcdoc(DOC_03692895X)
     self.assertNotEquals(None, em)
     self.assertEquals(em.x245a, [u'De hydrophobia nonnulla /'])
     self.assertEquals(em.x245b, [])
     self.assertEquals(em.x100a, [u'Nahmer, Friedrich Wilhelm V. D.'])
     self.assertEquals(em.x700a, [])
     self.assertEquals(list(em.isbns()), [])
Example #3
0
 def test_flattened_689(self):
     em = marcx.marcdoc(DOC_004867815)
     self.assertEquals(5, len(em.x936k))
     self.assertEquals(em.x936, [[{u'a': u'NH 6000', u'k': [
         u'Geschichte',
         u'Griechisch-ro\u0308mische Geschichte',
         u'Griechische Geschichte',
         u'Griechische Geschichte (500 - 338)',
         u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'],
         u'b': u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)',
         u'ind1': u'r', u'ind2': u'v'}]])
Example #4
0
    def test_DOC_091849799(self):
        em = marcx.marcdoc(DOC_091849799)
        self.assertNotEquals(None, em)
        self.assertEquals(list(em.isbns()), [
            u'0262032937', u'0070131511', u'0262531968', u'9780262032933',
            u'0-262-03293-7', u'0-07-013151-1', u'0-262-53196-8',
            u'978-0-262-03293-3'
        ])

        self.assertEquals(em.x700a, [u'Cormen, Thomas H.'])
        self.assertEquals(em.x935b, [u'druck'])
        self.assertEquals(em.x650x, [u'Programming'])
        self.assertEquals(em.x650x, [u'Programming'])
        self.assertEquals(em.y650x, [u'Programming'])
        self.assertEquals(em._650x, [u'Programming'])
        self.assertEquals(em.x999, [])
        self.assertEquals(em.x999yyyyy, [])
Example #5
0
 def run(self):
     es = elasticsearch.Elasticsearch(timeout=self.timeout)
     hits = eshelpers.scan(es, {'query': {"regexp": {"_all": "10\\.[0-9]{4,}"}}}, scroll=self.scroll)
     with self.output().open('w') as output:
         for hit in hits:
             doc_id, index = hit.get('_id'), hit.get('_index')
             content = hit.get('_source').get('content')
             matches = re.findall(r"10\.[0-9]{4,}/[^ \t\"]{3,}", json.dumps(content))
             for candidate in set(matches):
                 doc = marcx.marcdoc(hit)
                 title = (doc.values('245.a') or [self.na])[0]
                 subtitle = (doc.values('245.b') or [self.na])[0]
                 pubyear = (doc.values('260.c') or [self.na])[0]
                 output.write_tsv(index,
                                  doc_id,
                                  candidate.encode(self.encoding),
                                  pubyear.encode(self.encoding),
                                  title.encode(self.encoding),
                                  subtitle.encode(self.encoding))
Example #6
0
 def test_flattened_689(self):
     em = marcx.marcdoc(DOC_004867815)
     self.assertEquals(5, len(em.x936k))
     self.assertEquals(em.x936, [[{
         u'a':
         u'NH 6000',
         u'k': [
             u'Geschichte', u'Griechisch-ro\u0308mische Geschichte',
             u'Griechische Geschichte',
             u'Griechische Geschichte (500 - 338)',
             u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'
         ],
         u'b':
         u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)',
         u'ind1':
         u'r',
         u'ind2':
         u'v'
     }]])
Example #7
0
    def test_DOC_091849799(self):
        em = marcx.marcdoc(DOC_091849799)
        self.assertNotEquals(None, em)
        self.assertEquals(list(em.isbns()), [u'0262032937',
                                             u'0070131511',
                                             u'0262531968',
                                             u'9780262032933',
                                             u'0-262-03293-7',
                                             u'0-07-013151-1',
                                             u'0-262-53196-8',
                                             u'978-0-262-03293-3'])

        self.assertEquals(em.x700a, [u'Cormen, Thomas H.'])
        self.assertEquals(em.x935b, [u'druck'])
        self.assertEquals(em.x650x, [u'Programming'])
        self.assertEquals(em.x650x, [u'Programming'])
        self.assertEquals(em.y650x, [u'Programming'])
        self.assertEquals(em._650x, [u'Programming'])
        self.assertEquals(em.x999, [])
        self.assertEquals(em.x999yyyyy, [])
Example #8
0
    def test_values(self):
        em = marcx.marcdoc(DOC_004867815)
        self.assertEquals(5, len(em.values('936k')))
        self.assertEquals(5, len(em.values('936.k')))

        self.assertEquals([
            u'Geschichte', u'Griechisch-ro\u0308mische Geschichte',
            u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)',
            u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'
        ], em.values('936k'))
        self.assertEquals([
            u'Geschichte', u'Griechisch-ro\u0308mische Geschichte',
            u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)',
            u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'
        ], em.values('936.k'))

        self.assertEquals(3, len(em.values('260.a', '260.b', '260.c')))
        self.assertEquals([
            u'Athe\u0300nes [u.a.] :',
            u"E\u0301cole Franc\u0327. d'Athe\u0300nes,", u'1976'
        ], em.values('260.a', '260.b', '260.c'))
Example #9
0
    def test_values(self):
        em = marcx.marcdoc(DOC_004867815)
        self.assertEquals(5, len(em.values('936k')))
        self.assertEquals(5, len(em.values('936.k')))

        self.assertEquals([u'Geschichte',
                           u'Griechisch-ro\u0308mische Geschichte',
                           u'Griechische Geschichte',
                           u'Griechische Geschichte (500 - 338)',
                           u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'],
                           em.values('936k'))
        self.assertEquals([u'Geschichte',
                           u'Griechisch-ro\u0308mische Geschichte',
                           u'Griechische Geschichte',
                           u'Griechische Geschichte (500 - 338)',
                           u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'],
                           em.values('936.k'))

        self.assertEquals(3, len(em.values('260.a', '260.b', '260.c')))
        self.assertEquals([u'Athe\u0300nes [u.a.] :',
                           u"E\u0301cole Franc\u0327. d'Athe\u0300nes,",
                           u'1976'],
                           em.values('260.a', '260.b', '260.c'))
Example #10
0
 def test_dict_functionality(self):
     em = marcx.marcdoc(DOC_091849799)
     self.assertEquals('bsz', em.get('_index'))
     self.assertEquals('091849799',
                       em.get('_source').get('content').get('001'))
Example #11
0
 def test_dict_functionality(self):
     em = marcx.marcdoc(DOC_091849799)
     self.assertEquals('bsz', em.get('_index'))
     self.assertEquals('091849799',
                       em.get('_source').get('content').get('001'))