Example #1
0
 def index_articles(self, titles=[], start=0, num=1000000):
     """
     @param titles: list of url_titles to retrieve.
     @param start: int Index of list at which to start.
     @param num: int Number of articles to index, beginning at start.
     @returns: (int num posted, list Articles that could not be posted)
     """
     posted = 0
     could_not_post = []
     for n,title in enumerate(titles):
         if (posted < num) and (n > start):
             logging.debug('%s/%s %s' % (n, len(titles), title))
             page = Proxy().page(title)
             if (page.published or settings.MEDIAWIKI_SHOW_UNPUBLISHED):
                 page_sources = [source['encyclopedia_id'] for source in page.sources]
                 for source in page.sources:
                     logging.debug('     %s' % source['encyclopedia_id'])
                     docstore.post(
                         settings.DOCSTORE_HOSTS, settings.DOCSTORE_INDEX, 'sources',
                         source['encyclopedia_id'], source
                     )
                 page.sources = page_sources
                 docstore.post(
                     settings.DOCSTORE_HOSTS, settings.DOCSTORE_INDEX, 'articles',
                     title, page.__dict__
                 )
                 posted = posted + 1
                 logging.debug('posted %s' % posted)
             else:
                 could_not_post.append(page)
     if could_not_post:
         logging.debug('Could not post these: %s' % could_not_post)
     return posted,could_not_post
Example #2
0
 def index_authors(self, titles=[]):
     """
     @param titles: list of url_titles to retrieve.
     """
     for n,title in enumerate(titles):
         logging.debug('%s/%s %s' % (n, len(titles), title))
         page = Proxy().page(title)
         docstore.post(
             settings.DOCSTORE_HOSTS, settings.DOCSTORE_INDEX, 'authors',
             title, page.__dict__
         )
Example #3
0
 def index_topics(self, json_text=None, url=settings.DDR_TOPICS_SRC_URL):
     """Upload topics.json; used for Encyc->DDR links on article pages.
     
     url = 'http://partner.densho.org/vocab/api/0.2/topics.json'
     models.Elasticsearch().index_topics(url)
     
     @param json_text: unicode Raw topics.json file text.
     @param url: URL of topics.json
     """
     if url and not json_text:
         r = requests.get(url)
         if r.status_code == 200:
             json_text = r.text
     docstore.post(
         settings.DOCSTORE_HOSTS, settings.DOCSTORE_INDEX, 'vocab',
         'topics', json.loads(json_text),
     )