15: "Social Sciences , Religion and Literature",
               16: "Europe",
               17: "Military and War",
               18: "Africa, India and Middle East",
               19: "Asia",
               20: "Great Britain",
               21: "Technology and Science"}


DATADIR = 'models/'
id2word = corpora.Dictionary.load_from_text(
    os.path.join(DATADIR, 'wiki_wordids.txt.bz2'))
lda_model = LdaModel.load(os.path.join(DATADIR, 'lda_model'))
geoip_reader = geoip2.database.Reader(
    os.path.join(DATADIR, 'GeoLite2-Country.mmdb'))
wikipedia.set_user_agent(
    'EditsGeoVisualization/1.0 (http://yasermartinez.com; [email protected])')

logging.info("Loaded models")


#--------------------------------------------------------#
#                       Helpers                          #
#--------------------------------------------------------#
def wiki_bow(title):
    """This function downloads text from Wikipedia

    Parameters
    ----------
    title: string
        The title of the article
for name in ['geosearch', 'languages', 'page', 'search', 'suggest', 'summary']:
    setattr(
        wikipedia, name,
        try_again_dec(
            wikipedia.exceptions.HTTPTimeoutError,
            wikipedia.exceptions.RedirectError,
            requests.exceptions.RequestException,
            retry=3
        )(getattr(wikipedia, name))
    )

# wikipedia configuration
wikipedia.set_lang('en')
wikipedia.set_rate_limiting(True)
wikipedia.set_user_agent(
    'Newsparser NE comparison (http://newsparser704.pythonanywhere.com/)'
)


class WikiData:
    
    pages = 'pages'
    ne_mapping = 'ne_mapping'

    nlp = nlp
    

    def __init__(self):
        self.mongo_client = MongoClient()
        self.db = self.mongo_client.wiki
Exemple #3
0
from .utils import lazyinit
from .utils.decorators import try_again_dec

# Use try_again_dec with the main methods of wikipedia
for name in ['geosearch', 'languages', 'page', 'search', 'suggest', 'summary']:
    setattr(
        wikipedia, name,
        try_again_dec(wikipedia.exceptions.HTTPTimeoutError,
                      wikipedia.exceptions.RedirectError,
                      requests.exceptions.RequestException,
                      retry=3)(getattr(wikipedia, name)))

# wikipedia configuration
wikipedia.set_lang('en')
wikipedia.set_rate_limiting(True)
wikipedia.set_user_agent(
    'Newsparser NE comparison (http://newsparser704.pythonanywhere.com/)')


class WikiData:

    pages = 'pages'
    ne_mapping = 'ne_mapping'

    nlp = nlp

    def __init__(self):
        self.mongo_client = MongoClient()
        self.db = self.mongo_client.wiki

    def __del__(self):
        self.mongo_client.close()