def select_search_url(self, entity):
        search = entity.get_search_terms(entity.names[0])
        results = wpapi.full_text_search(search)
        extra = ""
        if entity.type == 'politician':
            md = entity.politician_metadata.all()[0]
            extra += md.state + ", " + md.party

        print
        print '----------------------', search, "(%s)" % entity.type, extra
        print
        for i,result in enumerate(results):
            so_wide = []
            for j in range(0, len(result['content']), 80):
                so_wide.append(result['content'][j:j + 80])
            content = "\n  ".join(so_wide)
            print i, result['title']
            print " ", wpapi.article_url(result['title'])
            print " ", content
            print

        while True:
            selection = raw_input(search + " ?> ")
            if selection == 'n':
                return None
            try:
                i = int(selection)
                return wpapi.article_url(results[i]['title'])
            except ValueError:
                pass

            if selection.startswith("http"):
                return selection
Esempio n. 2
0
    def select_search_url(self, entity):
        search = entity.get_search_terms(entity.names[0])
        results = wpapi.full_text_search(search)
        extra = ""
        if entity.type == 'politician':
            md = entity.politician_metadata.all()[0]
            extra += md.state + ", " + md.party

        print
        print '----------------------', search, "(%s)" % entity.type, extra
        print
        for i, result in enumerate(results):
            so_wide = []
            for j in range(0, len(result['content']), 80):
                so_wide.append(result['content'][j:j + 80])
            content = "\n  ".join(so_wide)
            print i, result['title']
            print " ", wpapi.article_url(result['title'])
            print " ", content
            print

        while True:
            selection = raw_input(search + " ?> ")
            if selection == 'n':
                return None
            try:
                i = int(selection)
                return wpapi.article_url(results[i]['title'])
            except ValueError:
                pass

            if selection.startswith("http"):
                return selection
Esempio n. 3
0
def find_wikipedia_url(entity):
    """
    Returns a tuple of (url, article excerpt, image url) for a given entity, or None if no
    matching article is found.
    """
    empty_result = ['', '', '']
    if entity.type in ['individual', 'industry']:
        return empty_result

    for ename in entity.names:
        # Search for exact title matches with redirects.  Use for comparing
        # titles later -- we might exactly match a redirect title, but not
        # match the destination page at all.  Full text search returns only
        # the destination pages, not the redirections.
        redirects = wpapi.title_search_redirects(ename.search_string())

        # Full text search!
        results = wpapi.full_text_search(entity.get_search_terms(ename))
        for result in results:
            article = wpapi.WikipediaArticle(result['title'])
            # Exclude special namespaced articles (e.g. User:, Template:,
            # etc.)
            if (article.namespace or article.title.startswith("List of")
                    or article.is_disambiguation_page()):
                continue

            if article.title in redirects:
                name_comp = wpapi.WikipediaArticle(redirects.get(
                    article.title)).name
            else:
                name_comp = article.name

            if entity.type == 'politician':
                if ename != PersonName(name_comp):
                    continue
                if not article.is_politician():
                    continue
                if not article.is_american():
                    continue
                subject = article.get_subject()
                if subject and ename != PersonName(article.get_subject()):
                    continue

            elif entity.type == 'organization':
                if ename.is_politician():
                    if ename.pname != PersonName(name_comp):
                        continue
                    if not article.is_politician():
                        continue
                    if not article.is_american():
                        continue
                else:
                    if ename != OrganizationName(name_comp):
                        continue
                    if article.is_person():
                        continue
                    if ename.is_company() and not article.is_company():
                        continue
            wikipedia_url = wpapi.article_url(article.title)
            wikipedia_excerpt, image_url = wpapi.get_article_excerpt_and_image_url(
                article.title)
            return (wikipedia_url, wikipedia_excerpt, image_url)
    return empty_result