def test_DOC_03692895X(self): em = marcx.marcdoc(DOC_03692895X) self.assertNotEquals(None, em) self.assertEquals(em.x245a, [u'De hydrophobia nonnulla /']) self.assertEquals(em.x245b, []) self.assertEquals(em.x100a, [u'Nahmer, Friedrich Wilhelm V. D.']) self.assertEquals(em.x700a, []) self.assertEquals(list(em.isbns()), [])
def test_flattened_689(self): em = marcx.marcdoc(DOC_004867815) self.assertEquals(5, len(em.x936k)) self.assertEquals(em.x936, [[{u'a': u'NH 6000', u'k': [ u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'], u'b': u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)', u'ind1': u'r', u'ind2': u'v'}]])
def test_DOC_091849799(self): em = marcx.marcdoc(DOC_091849799) self.assertNotEquals(None, em) self.assertEquals(list(em.isbns()), [ u'0262032937', u'0070131511', u'0262531968', u'9780262032933', u'0-262-03293-7', u'0-07-013151-1', u'0-262-53196-8', u'978-0-262-03293-3' ]) self.assertEquals(em.x700a, [u'Cormen, Thomas H.']) self.assertEquals(em.x935b, [u'druck']) self.assertEquals(em.x650x, [u'Programming']) self.assertEquals(em.x650x, [u'Programming']) self.assertEquals(em.y650x, [u'Programming']) self.assertEquals(em._650x, [u'Programming']) self.assertEquals(em.x999, []) self.assertEquals(em.x999yyyyy, [])
def run(self): es = elasticsearch.Elasticsearch(timeout=self.timeout) hits = eshelpers.scan(es, {'query': {"regexp": {"_all": "10\\.[0-9]{4,}"}}}, scroll=self.scroll) with self.output().open('w') as output: for hit in hits: doc_id, index = hit.get('_id'), hit.get('_index') content = hit.get('_source').get('content') matches = re.findall(r"10\.[0-9]{4,}/[^ \t\"]{3,}", json.dumps(content)) for candidate in set(matches): doc = marcx.marcdoc(hit) title = (doc.values('245.a') or [self.na])[0] subtitle = (doc.values('245.b') or [self.na])[0] pubyear = (doc.values('260.c') or [self.na])[0] output.write_tsv(index, doc_id, candidate.encode(self.encoding), pubyear.encode(self.encoding), title.encode(self.encoding), subtitle.encode(self.encoding))
def test_flattened_689(self): em = marcx.marcdoc(DOC_004867815) self.assertEquals(5, len(em.x936k)) self.assertEquals(em.x936, [[{ u'a': u'NH 6000', u'k': [ u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)' ], u'b': u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)', u'ind1': u'r', u'ind2': u'v' }]])
def test_DOC_091849799(self): em = marcx.marcdoc(DOC_091849799) self.assertNotEquals(None, em) self.assertEquals(list(em.isbns()), [u'0262032937', u'0070131511', u'0262531968', u'9780262032933', u'0-262-03293-7', u'0-07-013151-1', u'0-262-53196-8', u'978-0-262-03293-3']) self.assertEquals(em.x700a, [u'Cormen, Thomas H.']) self.assertEquals(em.x935b, [u'druck']) self.assertEquals(em.x650x, [u'Programming']) self.assertEquals(em.x650x, [u'Programming']) self.assertEquals(em.y650x, [u'Programming']) self.assertEquals(em._650x, [u'Programming']) self.assertEquals(em.x999, []) self.assertEquals(em.x999yyyyy, [])
def test_values(self): em = marcx.marcdoc(DOC_004867815) self.assertEquals(5, len(em.values('936k'))) self.assertEquals(5, len(em.values('936.k'))) self.assertEquals([ u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)' ], em.values('936k')) self.assertEquals([ u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)' ], em.values('936.k')) self.assertEquals(3, len(em.values('260.a', '260.b', '260.c'))) self.assertEquals([ u'Athe\u0300nes [u.a.] :', u"E\u0301cole Franc\u0327. d'Athe\u0300nes,", u'1976' ], em.values('260.a', '260.b', '260.c'))
def test_values(self): em = marcx.marcdoc(DOC_004867815) self.assertEquals(5, len(em.values('936k'))) self.assertEquals(5, len(em.values('936.k'))) self.assertEquals([u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'], em.values('936k')) self.assertEquals([u'Geschichte', u'Griechisch-ro\u0308mische Geschichte', u'Griechische Geschichte', u'Griechische Geschichte (500 - 338)', u'Peloponnesischer Krieg und Niedergang der Polis (431 - 360)'], em.values('936.k')) self.assertEquals(3, len(em.values('260.a', '260.b', '260.c'))) self.assertEquals([u'Athe\u0300nes [u.a.] :', u"E\u0301cole Franc\u0327. d'Athe\u0300nes,", u'1976'], em.values('260.a', '260.b', '260.c'))
def test_dict_functionality(self): em = marcx.marcdoc(DOC_091849799) self.assertEquals('bsz', em.get('_index')) self.assertEquals('091849799', em.get('_source').get('content').get('001'))