def get_context(self): logging.info("Ongoing...") for value in self._submissions.values(): try: article = Article(value["submission.url"]) article.download() article.parse() article.nlp() value["article.authors"] = article.authors value["article.text"] = article.text geo = Geoparser() places = geo.geoparse(article.text) countries_in_article = set() for country in places: countries_in_article.add(country["country_predicted"]) for country in countries_in_article: if not str(country) in self._countries: self._countries[str( country)] = value["calculated_score"] else: self._countries[str( country)] += value["calculated_score"] value["article.top_image"] = article.top_image value["article.summary"] = article.summary value["article.keywords"] = article.keywords value["article.countries"] = list(countries_in_article) except Exception as e: logging.info("Error: " + str(e))
class VCGeotagger: def __init__(self): self.geo = Geoparser() long_short_state = states.split('\n') long_short_state = [ent.split('-') for ent in long_short_state] long_short_state[9] = ['Georgia United States', 'GA'] self.state_dict = {a[1]: a[0] for a in long_short_state} def remove_non_ascii(self,text): return ''.join(i for i in text if ord(i)<128) def geotag(self, text): text = self.remove_non_ascii(text) result = self.geo.geoparse(text) if not result: return "None", "None" for r in result: if r['country_predicted'] == 'USA' and 'geo' in r: state = r['geo']['admin1'] city = r['geo']['place_name'] if state != city: return city, state else: return "None", state def placetag(self, text): tmp_list = text.split(', ') if len(tmp_list) == 2: if tmp_list[1] in self.state_dict: state = self.state_dict[tmp_list[1]] else: state = tmp_list[1] city = tmp_list[0] else: state = 'None' city = 'None' return city, state
def search_geolocation(message): paste = Paste.Paste(message) content = paste.get_p_content() # Load Geoparser geo = Geoparser() geolocation = geo.geoparse(message) # regex ton find latitude and longitude reg_lat = re.compile(r'(\'lat\': \'([-\d.]+)\',)') reg_lon = re.compile(r'(\'lon\': \'([-\d.]+)\',)') #lat = set(reg_lat.findall(content)) #lon = set(reg_lat.findall(content)) lat = reg_lat.search(message).group(2) lon = reg_lon.search(message).group(2) print('latitude: {}'.format(lat)) print('longitude: {}'.format(lon)) print('{} text geolocation'.format(paste.p_name)) publisher.warning('{} contains geolocation'.format(paste.p_name))
capital = re.compile('.*[A-Z]*.*') #geo = Geoparser() for inputfile in os.listdir("../processed_files"): name, extension = os.path.splitext(inputfile) outfilename = name + "_output.txt" inputfile = "../processed_files/" + inputfile print("Outfile name: " + outfilename) if xmlfile.match(inputfile) and outfilename not in os.listdir( "../geoparser_output"): # Only process XML files geo = Geoparser() with open(inputfile, "r") as infile: print("Processing data from " + inputfile + "...") data = infile.readlines() infile.close() output = geo.geoparse(str(data)) outfilename = "../geoparser_output/" + outfilename with open(outfilename, "a") as outfile: for word in output: if capital.match( word['word'] ): # Filter out place names that don't contain any capital later (Comment out to remove filter) outfile.write(str(word)) outfile.write("\n") outfile.close() # break # Temporary modification: process only one file at a time # quit() # End script execution
from mordecai import Geoparser geo = Geoparser() print(geo.geoparse("Retencions a la B-23, Barcelona."))
from functools import reduce import json geo = Geoparser() ALL_Location = [] run_function = lambda x, y: x if y in x else x + [y] with open('data/covid_19.csv', errors="ignore") as f: Reader = csv.DictReader(f) for row in Reader: a = row["abstract"] t = row["title"] j = row["journal"] url = row["url"] try: geoINF = geo.geoparse(a) except: continue for i in range(len(geoINF)): location = { 'word': '', 'place_name': '', 'country': '', 'lat': '', 'lon': '', 'title': t, 'journal': j, 'url': url } try: a = geoINF[i]['geo']