15: "Social Sciences , Religion and Literature", 16: "Europe", 17: "Military and War", 18: "Africa, India and Middle East", 19: "Asia", 20: "Great Britain", 21: "Technology and Science"} DATADIR = 'models/' id2word = corpora.Dictionary.load_from_text( os.path.join(DATADIR, 'wiki_wordids.txt.bz2')) lda_model = LdaModel.load(os.path.join(DATADIR, 'lda_model')) geoip_reader = geoip2.database.Reader( os.path.join(DATADIR, 'GeoLite2-Country.mmdb')) wikipedia.set_user_agent( 'EditsGeoVisualization/1.0 (http://yasermartinez.com; [email protected])') logging.info("Loaded models") #--------------------------------------------------------# # Helpers # #--------------------------------------------------------# def wiki_bow(title): """This function downloads text from Wikipedia Parameters ---------- title: string The title of the article
for name in ['geosearch', 'languages', 'page', 'search', 'suggest', 'summary']: setattr( wikipedia, name, try_again_dec( wikipedia.exceptions.HTTPTimeoutError, wikipedia.exceptions.RedirectError, requests.exceptions.RequestException, retry=3 )(getattr(wikipedia, name)) ) # wikipedia configuration wikipedia.set_lang('en') wikipedia.set_rate_limiting(True) wikipedia.set_user_agent( 'Newsparser NE comparison (http://newsparser704.pythonanywhere.com/)' ) class WikiData: pages = 'pages' ne_mapping = 'ne_mapping' nlp = nlp def __init__(self): self.mongo_client = MongoClient() self.db = self.mongo_client.wiki
from .utils import lazyinit from .utils.decorators import try_again_dec # Use try_again_dec with the main methods of wikipedia for name in ['geosearch', 'languages', 'page', 'search', 'suggest', 'summary']: setattr( wikipedia, name, try_again_dec(wikipedia.exceptions.HTTPTimeoutError, wikipedia.exceptions.RedirectError, requests.exceptions.RequestException, retry=3)(getattr(wikipedia, name))) # wikipedia configuration wikipedia.set_lang('en') wikipedia.set_rate_limiting(True) wikipedia.set_user_agent( 'Newsparser NE comparison (http://newsparser704.pythonanywhere.com/)') class WikiData: pages = 'pages' ne_mapping = 'ne_mapping' nlp = nlp def __init__(self): self.mongo_client = MongoClient() self.db = self.mongo_client.wiki def __del__(self): self.mongo_client.close()