Esempio n. 1
0
 def test_check_required_fields(self):
     doc = {'id': 'sid', '_id': 'hid'}
     self.assertRaisesRegexp(
         MissingSourceResource,
         '---- OMITTED: Doc:hid has no sourceResource.',
         has_required_fields, doc)
     doc['sourceResource'] = {}
     self.assertRaisesRegexp(MissingTitle,
                             '---- OMITTED: Doc:hid has no title.',
                             has_required_fields, doc)
     doc['sourceResource'].update({'title': 'test-title'})
     self.assertRaisesRegexp(MissingRights,
                             '---- OMITTED: Doc:hid has no rights.',
                             has_required_fields, doc)
     doc['sourceResource'].update({'rights': 'hasRights'})
     self.assertRaisesRegexp(MissingIsShownAt,
                             '---- OMITTED: Doc:hid has no isShownAt.',
                             has_required_fields, doc)
     doc.update({'isShownAt': 'y'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: y', has_required_fields, doc)
     doc.update({'isShownAt': 'http://'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: http://', has_required_fields, doc)
     doc.update({'isShownAt': 'http://netloc'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: http://netloc', has_required_fields, doc)
     doc.update({'isShownAt': 'http://netloc/path'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc.update({'isShownAt': 'http://netloc/;params'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc.update({'isShownAt': 'http://netloc/?query'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc['sourceResource'].update({'type': 'image'})
     self.assertRaisesRegexp(
         MissingImage,
         '---- OMITTED: Doc:hid is image type with no harvested image.',
         has_required_fields, doc)
     doc['object'] = 'has object'
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
Esempio n. 2
0
 def test_check_required_fields(self):
     doc = {'id': 'sid', '_id': 'hid'}
     self.assertRaisesRegexp(MissingSourceResource,
                             '---- OMITTED: Doc:hid has no sourceResource.',
                             has_required_fields, doc)
     doc['sourceResource'] = {}
     self.assertRaisesRegexp(MissingTitle,
                             '---- OMITTED: Doc:hid has no title.',
                             has_required_fields, doc)
     doc['sourceResource'].update({'title': 'test-title'})
     self.assertRaisesRegexp(MissingRights,
                             '---- OMITTED: Doc:hid has no rights.',
                             has_required_fields, doc)
     doc['sourceResource'].update({'rights': 'hasRights'})
     self.assertRaisesRegexp(MissingIsShownAt,
                             '---- OMITTED: Doc:hid has no isShownAt.',
                             has_required_fields, doc)
     doc.update({'isShownAt': 'y'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: y', has_required_fields, doc)
     doc.update({'isShownAt': 'http://'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: http://', has_required_fields, doc)
     doc.update({'isShownAt': 'http://netloc'})
     self.assertRaisesRegexp(
         isShownAtNotURL,
         '---- OMITTED: Doc:hid isShownAt doesn\'t appear to be'
         'a URL: http://netloc', has_required_fields, doc)
     doc.update({'isShownAt': 'http://netloc/path'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc.update({'isShownAt': 'http://netloc/;params'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc.update({'isShownAt': 'http://netloc/?query'})
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc['sourceResource'].update({'type': 'image'})
     self.assertRaisesRegexp(
         MissingImage,
         '---- OMITTED: Doc:hid is image type with no harvested image.',
         has_required_fields, doc)
     doc['object'] = 'has object'
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
def main(collection_key):
    v = CouchDBCollectionFilter(couchdb_obj=get_couchdb(),
                                collection_key=collection_key)
    solr_db = Solr(URL_SOLR)
    results = []
    for r in v:
        dt_start = dt_end = datetime.datetime.now()
        try:
            doc = fill_in_title(r.doc)
            has_required_fields(r.doc)
        except KeyError, e:
            print(e.message)
            continue
        solr_doc = map_couch_to_solr_doc(r.doc)
        results.append(solr_doc)
        solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db)
        dt_end = datetime.datetime.now()
def main(collection_key):
    v = CouchDBCollectionFilter(
        couchdb_obj=get_couchdb(), collection_key=collection_key)
    solr_db = Solr(URL_SOLR)
    results = []
    for r in v:
        dt_start = dt_end = datetime.datetime.now()
        try:
            doc = fill_in_title(r.doc)
            has_required_fields(r.doc)
        except KeyError, e:
            print(e.message)
            continue
        solr_doc = map_couch_to_solr_doc(r.doc)
        results.append(solr_doc)
        solr_doc = push_doc_to_solr(solr_doc, solr_db=solr_db)
        dt_end = datetime.datetime.now()
Esempio n. 5
0
 def test_check_required_fields(self):
     doc = {"id": "test-id"}
     self.assertRaises(KeyError, has_required_fields, doc)
     doc = {"id": "test-id", "sourceResource": {}}
     self.assertRaises(KeyError, has_required_fields, doc)
     doc = {"id": "test-id", "sourceResource": {"title": "test-title", "_id": "x"}}
     self.assertRaises(KeyError, has_required_fields, doc)
     doc = {"id": "test-id", "sourceResource": {"title": "test-title"}, "_id": "x", "isShownAt": "y"}
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)
     doc = {"id": "test-id", "sourceResource": {"title": "test-title", "type": "image"}}
     self.assertRaises(KeyError, has_required_fields, doc)
     doc = {
         "id": "test-id",
         "object": "hasobject",
         "_id": "x",
         "isShownAt": "y",
         "sourceResource": {"title": "test-title", "type": "image"},
     }
     ret = has_required_fields(doc)
     self.assertEqual(ret, True)