def __init__(self): #Options text = """Starting from a list of Wikipedia categories written by the user in 'config.cfg' file, the script: - downloads/updates a national OSM data file - downloads from (from Quick Intersection) Wikipedia data regarding the selected categories (subcategories and articles names) - creates webpages for showing which articles are already tagged and which ones are not. """ parser = argparse.ArgumentParser(description=text) group = parser.add_mutually_exclusive_group() #Manage OSM data parser.add_argument("-d", "--download_osm", help="Download OSM data of the country (from Geofabrik)", action="store_true") parser.add_argument("-u", "--update_osm", help="Update downloaded OSM data of the country (through osmupdate)", action="store_true") #Analyze data from Wikipedia and OSM parser.add_argument("-a", "--analyze", help="Analyze Wikipedia data (categories' sub-categories and articles) ed OSM data (existing Wikipedia tags)", action="store_true") parser.add_argument("--category_info", help="Analyze data and print informations regarding a specific category", action="store") parser.add_argument("-t", "--show_missing_templates", help="Mark on web pages the articles that miss geo template (Coord)", action="store_true") parser.add_argument("-c", "--show_link_to_wikipedia_coordinates", help="If a non-tagged article have the coordinates on Wikipedia, show on the web pages a link to zoom on its position with JOSM/iD", action="store_true") parser.add_argument("-o", "--show_coordinates_from_osm", help="Calculate OSM coordinates of articles (point for nodes, centroids for ways and relations)", action="store_true") parser.add_argument("-n", "--infer_coordinates_from_wikipedia", help="Use Nuts4Nuts to calculate the coordinates of a non tagged article whithout coordinates on Wikipedia", action="store_true") group.add_argument("-p", "--print_categories_list", help="Analyze data and print project's categories.", action="store_true") #Create webpages group.add_argument("-w", "--create_webpages", help="Analyze data and create web pages", action="store_true") parser.add_argument("-s", "--save_stats", help="If web pages have been created, store the updated number of tagged articles (default: ask to user).", action="store_true") parser.add_argument("--browser", help="Open the web pages with the system browser after creation.", action="store_true") parser.add_argument("--copy", help="Copy html folder to the directory configured on `config.cfg` (eg. dropbox dir).", action="store_true") parser.add_argument("--locale", nargs='+', dest='locales', metavar='LANG', help="Generate pages in the specified locales. Default: use the system locale. ") self.args = parser.parse_args() if self.args.category_info or self.args.category_info\ or self.args.create_webpages or self.args.print_categories_list\ or self.args.show_missing_templates\ or self.args.show_coordinates_from_osm: self.args.analyze = True # Default value for locale # get system locale sys_locale_langcode, sys_locale_encoding = locale.getdefaultlocale() if not self.args.locales: self.args.locales = [sys_locale_langcode] if len(sys.argv) == 1: parser.print_help() sys.exit(1) os.chdir(os.path.dirname(sys.argv[0])) #Configurations themesAndCatsNames = self.read_config() ### Manage OpenStreetMap data ########################################## #Analyse national OSM data file and create lists of already #tagged Wikipedia articles. #Download/update OSM data if self.args.download_osm or self.args.update_osm: if self.args.download_osm: OSM.download_osm_data(self) if self.args.update_osm: status = OSM.update_osm_data(self) if self.args.download_osm or (self.args.update_osm and status): OSM.filter_wikipedia_data_in_osm_file(self) if self.args.update_osm and not status: print "OSM data where already uptodate or osmupdate has been interrupted.\ To repeat the updating process, launch the script again with the `-u` option." if not self.args.analyze: #"There's nothing left for me to tell you" sys.exit(1) else: if not os.path.isfile(self.wOSMFile): OSM.filter_wikipedia_data_in_osm_file(self) #Extract Wikipedia articles tagged in OSM with preferred language. #If an article is tagged in a foreign language, ask to Wikpedia #what is the corresponding article of the preferred language, so #that we can flag it as tagged aswell. print "\n- Read from the OSM file the articles already tagged" parseOSMData = ParseOSMData(self) #list of Wikipedia tags in OSM self.tagsInOSM = parseOSMData.allTags self.tagsData = parseOSMData.tagsData #list of tagged Wikipedia articles self.taggedTitles = parseOSMData.titles #tags with errors self.wrongTags = parseOSMData.wrongTags #ugly tags (with url, language capitalized...), not errors self.badTags = parseOSMData.badTags #add articles manually flagged as tagged in data/workaround/tagged.csv #in case the parser misses them (strange tags) self.add_tagged_articles() if self.args.show_coordinates_from_osm: print "\n--- Add OSM coordinates to the articles" parseOSMData.get_centroids() ### Manage Wikipedia data ############################################## #Read from 'non-mappable' file the categories and articles that #aren't mappable e.g. "Paintings in the X museum", #self.nonMappable = {mainCategory.name : {"articles" : [], "subcategories" : []}} self.nonMappable = self.read_non_mappable_items() #Check if we have Wikipedia data from Quick Intersection of all the #categories in the project (config.cfg file) themesAndCatsNames = wikipedia_downloader.check_catscan_data(self, themesAndCatsNames) #Organize Wikipedia data. #self.themes = [Theme(), ...] # Theme().categories = [Category(), ...] # Category().subcategories = [Category(), ...] # Category().articles = [Article(), ...] #categories without Quick Intersection data self.categoriesWithoutData = [] allThemes = Themes(self, themesAndCatsNames) self.themes = allThemes.themesList #Organize data in regions, for a different visualization #self.regions = [Region()] # Region().categories = [Category(), ... ] self.regions = [] if self.regionsNames != []: self.regions = Regions(self).regionsList #Print names of all categories if self.args.print_categories_list: self.display_categories_names() if not self.args.category_info: #"There's nothing left for me to tell you" sys.exit(1) ### Merge OSM info into Wikipedia data ################################# #Add to Wikipedia categories and articles istances info about #their status in OSM: (tagged/not tagged), osm ids and counters print ("\n- Check which articles are already tagged in the country's " "OSM file") for theme in self.themes: for category in theme.categories: category.check_articles_in_osm() self.titlesInOSM, self.titlesNotInOSM = allThemes.lists_of_titles_in_osm_or_not() #Ask to Wikipedia which articles have/have not Coord template. #Articles with article.hasTemplate == False will be marked on web pages. if self.args.show_missing_templates: print "\n- Check which articles miss geo template (Coord) in Wikipedia" self.templatesStatus = wikipedia_downloader.read_old_templates_status(self) wikipedia_downloader.update_templates_status(self) #Set hasTemplate = False to articles without Coord template for theme in self.themes: for category in theme.categories: category.set_has_template_in_articles() #If an article is not already tagged in OSM but Wikipedia knows its #position, it is possible to add a link to zoom to that position #with JOSM. if self.args.show_link_to_wikipedia_coordinates: print "\n- Check the non tagged articles whose position is known by Wikipedia" wikipedia_downloader.add_wikipedia_coordinates(self) #Save GeoJSON file with titles and coordinates known by Wikipedia self.save_titles_with_coords_geojson() if self.args.infer_coordinates_from_wikipedia: print "\n- Use Nuts4Nuts to infer coordinates of non tagged articles, whose position is unknown by Wikipedia" nuts4nuts_infer.infer_coordinates_with_nuts4nuts(self) #For debugging # print info about a specific category if self.args.category_info: self.print_category_info(self.args.category_info.replace(" ", "_")) if self.args.create_webpages: raw_input("\nContinue?[Press any key]") # write categories trees to text files (uncomment lines) if self.print_categories_to_text_files == "true": for theme in self.themes: for category in theme.categories: category.print_category_tree_to_file() #Read and update stats with the number of tagged articles self.dates, self.days = self.read_past_stats() download_other_countries = False self.todayDate, today = self.read_new_stats(download_other_countries) self.days.append(today) self.dates.append(self.todayDate) if len(self.dates) > 1 and self.todayDate == self.dates[-2]: #This is the second analysis of today. #Overwrite the previous statistics del self.dates[-2] del self.days[-2] print "\n This is the second time that data ara analyzed today. \ The number of tagged articles will replace that of the lust run in the tags' numbers table." #Count tags added by each user self.users = Users(self).users #Create a json file with the data (needed by non_mappable.html) tree = {"mappable": True, "name": "Main", "size": 1, "children": []} for theme in self.themes: for category in theme.categories: tree["children"].append(category.build_json_tree()) ifile = open(os.path.join(self.HTMLDIR, "json", "main.json"), "w") data = json.dumps(tree) ifile.write(data) ifile.close() #Create webpages if self.args.create_webpages: # Restrict to the supported locales self.locales = frozenset(self.SUPPORTED_LOCALES).intersection( frozenset(self.args.locales)) non_supported_locales = frozenset(self.args.locales) - \ frozenset(self.SUPPORTED_LOCALES) for locale_langcode in non_supported_locales: print 'Warning: dropping unsupported locale: {0}'.format( locale_langcode) # if no supported locale is chosen fallback to en_US if not self.locales: self.locales = frozenset(['en_US']) for locale_langcode in self.locales: self.translations = Translations.load("locale", [locale_langcode] ) self._ = self.translations.ugettext print "\n- Create web pages with locale: ", locale_langcode Creator(self, locale_langcode) if self.args.browser: url = os.path.join('html', locale_langcode, 'index.html') # using .get() suppress stdout output from browser, won't # suppress stderr webbrowser.get().open_new(url) # Create the index.html in the main HTMLDIR to redirect to one # locales directory for lang in self.locales: if self.WIKIPEDIALANG in lang: Redirect(self, lang) break #Save stats if self.args.save_stats: self.save_stats_to_csv() print "\nNew stats have been saved." else: print "\nNo stats saved." #Copy files from html dir to outdir (for example a Dropbox directory) if self.args.copy: self.copy_html_files_to_outdir() print "\nDone."
def __init__(self): # Options text = "A partire da una lista di categorie inserite dall'utente nel file 'config', lo script:\ scarica/aggiorna i dati OSM nazionali, scarica da Wikipedia i dati sulle categorie (gli articoli che le compongono)\ e crea delle pagine HTML indicando gli articoli già taggati e da taggare in OSM." parser = argparse.ArgumentParser(description=text) group = parser.add_mutually_exclusive_group() # Manage OSM data parser.add_argument( "-d", "--download_osm", help="Scarica i dati OSM nazionali (da Geofabrik)", action="store_true" ) parser.add_argument( "-u", "--update_osm", help="Aggiorna i dati OSM nazionali scaricati (tramite osmupdate)", action="store_true", ) # Analyze data from Wikipedia and OSM parser.add_argument( "-a", "--analyze", help="Analizza i dati: Wikipedia (sottocategorie ed articoli delle categorie) ed OSM (tag Wikipedia presenti)", action="store_true", ) parser.add_argument( "--category_info", help="Analizza i dati e stampa le informazioni su una specifica categoria", action="store", ) parser.add_argument( "-t", "--show_missing_templates", help="Segnala gli articoli senza template Coord", action="store_true" ) parser.add_argument( "-c", "--show_link_to_wikipedia_coordinates", help="Se un articolo non taggato ha delle coordinate su Wikipedia, mostra un link per zoomare sulla sua posizione con JOSM", action="store_true", ) parser.add_argument( "-o", "--show_coordinates_from_osm", help="Calcola le coordinate del punto (per i nodi) o del centroide (per way e relations) dell'oggetto", action="store_true", ) parser.add_argument( "-n", "--infer_coordinates_from_wikipedia", help="Usa Nuts4Nuts per cercare le coordinate di un articolo non taggato e senza coordinate su Wikipedia", action="store_true", ) parser.add_argument( "-r", "--update_redirects", help="Controlla gli articoli delle liste che sono redirects e crea un file con la loro lista per poterli aggiungere manualmente tra i non mappabili, perché non supportati da WIWOSM", action="store_true", ) group.add_argument( "-p", "--print_categories_list", help="Analizza i dati e stampa la lista delle categorie nel progetto.", action="store_true", ) # Create webpages group.add_argument( "-w", "--create_webpages", help="Analizza i dati ed aggiorna le pagine web", action="store_true" ) parser.add_argument( "-s", "--save_stats", help="Se sono state aggiornate le pagine web, salva il conteggio aggiornato con il numero di articoli taggati (default: chiedi cosa fare).", action="store_true", ) parser.add_argument( "--nofx", help="Non aprire le pagine web in Firefox dopo averle aggiornate.", action="store_true" ) parser.add_argument( "--copy", help="Copia la cartella html nella directory descritta nel file config (es. dir dropbox).", action="store_true", ) parser.add_argument("--bitly", help="Use bitly links, to count visits to homepage.", action="store_true") self.args = parser.parse_args() if ( self.args.category_info or self.args.category_info or self.args.create_webpages or self.args.print_categories_list or self.args.update_redirects or self.args.show_missing_templates or self.args.show_coordinates_from_osm ): self.args.analyze = True if len(sys.argv) == 1: parser.print_help() sys.exit(1) os.chdir(os.path.dirname(sys.argv[0])) # Configurations themesAndCatsNames = self.read_config() ### Manage OpenStreetMap data ########################################## # Analyse national OSM data file and create lists of already # tagged Wikipedia articles. # Download/update OSM data if self.args.download_osm or self.args.update_osm: if self.args.download_osm: OSM.download_osm_data(self) if self.args.update_osm: status = OSM.update_osm_data(self) if self.args.download_osm or (self.args.update_osm and status): OSM.filter_wikipedia_data_in_osm_file(self) if self.args.update_osm and not status: print "I dati OSM erano già aggiornati all'ultimo minuto, o l'aggiornamento con osmupdate è stato interrotto.\ Per ripetere l'aggiornamento, lanciare nuovamente lo script con l'opzione -u." if not self.args.analyze: # "There's nothing left for me to tell you" sys.exit(1) else: if not os.path.isfile(self.wOSMFile): OSM.filter_wikipedia_data_in_osm_file(self) # Extract Wikipedia articles tagged in OSM with preferred language. # If an article is tagged in a foreign language, ask to Wikpedia # what is the corresponding article of the preferred language, so # that we can flag it as tagged aswell. print "\n- Estrai dal file OSM gli articoli già taggati" parseOSMData = ParseOSMData(self) # list of Wikipedia tags in OSM self.tagsInOSM = parseOSMData.allTags self.tagsData = parseOSMData.tagsData # list of tagged Wikipedia articles self.taggedTitles = parseOSMData.titles # tags with errors self.wrongTags = parseOSMData.wrongTags # ugly tags (with url, language capitalized...), not errors self.badTags = parseOSMData.badTags # add articles manually flagged as tagged in data/workaround/tagged.csv # in case the parser misses them (strange tags) self.add_tagged_articles() if self.args.show_coordinates_from_osm: print "\n--- Aggiungi le coordinate calcolare da OSM" parseOSMData.get_centroids() ### Manage Wikipedia data ############################################## # Read from 'non-mappable' file the categories and articles that # aren't mappable e.g. "Paintings in the X museum", # self.nonMappable = {mainCategory.name : {"articles" : [], "subcategories" : []}} self.nonMappable = self.read_non_mappable_items() # Check if we have Wikipedia data from CatScan of all the # categories in the project (config file) themesAndCatsNames = wikipedia_downloader.check_catscan_data(self, themesAndCatsNames) # Organize Wikipedia data. # self.themes = [Theme(), ...] # Theme().categories = [Category(), ...] # Category().subcategories = [Category(), ...] # Category().articles = [Article(), ...] # categories without catscan data self.categoriesWithoutData = [] allThemes = Themes(self, themesAndCatsNames) self.themes = allThemes.themesList # Organize data in regions, for a different visualization # self.regions = [Region()] # Region().categories = [Category(), ... ] self.regions = Regions(self).regionsList # Print names of all categories if self.args.print_categories_list: self.display_categories_names() if not self.args.category_info: # "There's nothing left for me to tell you" sys.exit(1) ### Merge OSM info into Wikipedia data ################################# # Add to Wikipedia categories and articles istances info about # their status in OSM: (tagged/not tagged), osm ids and counters print "\n- Controlla quali articoli nelle liste sono già taggati nel file OSM" for theme in self.themes: for category in theme.categories: category.check_articles_in_osm() self.titlesInOSM, self.titlesNotInOSM = allThemes.lists_of_titles_in_osm_or_not() # Ask to Wikipedia which articles have/have not Coord template. # Articles with article.hasTemplate == False will be marked on web pages. if self.args.show_missing_templates: print "\n- Controlla quali articoli non hanno il template Coord in Wikipedia" self.templatesStatus = wikipedia_downloader.read_old_templates_status(self) wikipedia_downloader.update_templates_status(self) # Set hasTemplate = False to articles without Coord template for theme in self.themes: for category in theme.categories: category.set_has_template_in_articles() # If an article is not already tagged in OSM but Wikipedia knows its # position, it is possible to add a link to zoom to that position # with JOSM. if self.args.show_link_to_wikipedia_coordinates: print "\n- Controlla di quali articoli non taggati Wikipedia conosce già la posizione" wikipedia_downloader.add_wikipedia_coordinates(self) # Save GeoJSON file with titles and coordinates known by Wikipedia self.save_titles_with_coords_geojson() if self.args.infer_coordinates_from_wikipedia: print "\n- Usa Nuts4Nuts per inferire la posizione di alcuni articoli" nuts4nuts_infer.infer_coordinates_with_nuts4nuts(self) # Download from Wikipedia the lists of redirects and create a file # so that they can then be manually copied to non_mappable file if self.args.update_redirects: wikipedia_downloader.find_redirects(self) # For debugging # print info about a specific category if self.args.category_info: self.print_category_info(self.args.category_info.replace(" ", "_")) if self.args.create_webpages: raw_input("\nContinue?[Press any key]") # write categories trees to text files (uncomment lines) if self.print_categories_to_text_files == "true": for theme in self.themes: for category in theme.categories: category.print_category_tree_to_file() # Read and update stats with the number of tagged articles self.dates, self.days = self.read_past_stats() download_other_countries = False self.todayDate, today = self.read_new_stats(download_other_countries) self.days.append(today) self.dates.append(self.todayDate) if len(self.dates) > 1 and self.todayDate == self.dates[-2]: # This is the second analysis of today. # Overwrite the previous statistics del self.dates[-2] del self.days[-2] print "\n Questa è la seconda volta che i dati vengono analizzati oggi. \ Il numero di articoli taggati sostituisce quelli precedenti nella tabella dei conteggi." # Count tags added by each user self.users = Users(self).users # Create a json file with the data (needed by non_mappable.html) tree = {"mappable": True, "name": "Main", "size": 1, "children": []} for theme in self.themes: for category in theme.categories: tree["children"].append(category.build_json_tree()) ifile = open(os.path.join(self.HTMLDIR, "json", "main.json"), "w") data = json.dumps(tree) ifile.write(data) ifile.close() # Create webpages if self.args.create_webpages: print "\n- Crea pagine web" Creator(self) # Save stats if self.args.create_webpages and self.args.save_stats: answer = "y" else: answer = raw_input( "\n- Salvo il numero di articoli mappati/da mappare in './data/stats/stats.csv'?\n [y/N]\n" ) if answer in ("y", "Y"): self.save_stats_to_csv() else: print "\nI nuovi conteggi non vengono salvati." # Copy files from html dir to outdir (for example a Dropbox directory) if self.args.copy: self.copy_html_files_to_outdir() print "\nDone."
def __init__(self): #Options text = "A partire da una lista di categorie inserite dall'utente nel file 'config', lo script:\ scarica/aggiorna i dati OSM nazionali, scarica da Wikipedia i dati sulle categorie (gli articoli che le compongono)\ e crea delle pagine HTML indicando gli articoli già taggati e da taggare in OSM." parser = argparse.ArgumentParser(description=text) group = parser.add_mutually_exclusive_group() #Manage OSM data parser.add_argument("-d", "--download_osm", help="Scarica i dati OSM nazionali (da Geofabrik)", action="store_true") parser.add_argument( "-u", "--update_osm", help="Aggiorna i dati OSM nazionali scaricati (tramite osmupdate)", action="store_true") #Analyze data from Wikipedia and OSM parser.add_argument( "-a", "--analyze", help= "Analizza i dati: Wikipedia (sottocategorie ed articoli delle categorie) ed OSM (tag Wikipedia presenti)", action="store_true") parser.add_argument( "--category_info", help= "Analizza i dati e stampa le informazioni su una specifica categoria", action="store") parser.add_argument("-t", "--show_missing_templates", help="Segnala gli articoli senza template Coord", action="store_true") parser.add_argument( "-c", "--show_link_to_wikipedia_coordinates", help= "Se un articolo non taggato ha delle coordinate su Wikipedia, mostra un link per zoomare sulla sua posizione con JOSM", action="store_true") parser.add_argument( "-o", "--show_coordinates_from_osm", help= "Calcola le coordinate del punto (per i nodi) o del centroide (per way e relations) dell'oggetto", action="store_true") parser.add_argument( "-n", "--infer_coordinates_from_wikipedia", help= "Usa Nuts4Nuts per cercare le coordinate di un articolo non taggato e senza coordinate su Wikipedia", action="store_true") parser.add_argument( "-r", "--update_redirects", help= "Controlla gli articoli delle liste che sono redirects e crea un file con la loro lista per poterli aggiungere manualmente tra i non mappabili, perché non supportati da WIWOSM", action="store_true") group.add_argument( "-p", "--print_categories_list", help= "Analizza i dati e stampa la lista delle categorie nel progetto.", action="store_true") #Create webpages group.add_argument("-w", "--create_webpages", help="Analizza i dati ed aggiorna le pagine web", action="store_true") parser.add_argument( "-s", "--save_stats", help= "Se sono state aggiornate le pagine web, salva il conteggio aggiornato con il numero di articoli taggati (default: chiedi cosa fare).", action="store_true") parser.add_argument( "--nofx", help="Non aprire le pagine web in Firefox dopo averle aggiornate.", action="store_true") parser.add_argument( "--copy", help= "Copia la cartella html nella directory descritta nel file config (es. dir dropbox).", action="store_true") parser.add_argument( "--bitly", help="Use bitly links, to count visits to homepage.", action="store_true") self.args = parser.parse_args() if self.args.category_info or self.args.category_info\ or self.args.create_webpages or self.args.print_categories_list\ or self.args.update_redirects\ or self.args.show_missing_templates\ or self.args.show_coordinates_from_osm: self.args.analyze = True if len(sys.argv) == 1: parser.print_help() sys.exit(1) os.chdir(os.path.dirname(sys.argv[0])) #Configurations themesAndCatsNames = self.read_config() ### Manage OpenStreetMap data ########################################## #Analyse national OSM data file and create lists of already #tagged Wikipedia articles. #Download/update OSM data if self.args.download_osm or self.args.update_osm: if self.args.download_osm: OSM.download_osm_data(self) if self.args.update_osm: status = OSM.update_osm_data(self) if self.args.download_osm or (self.args.update_osm and status): OSM.filter_wikipedia_data_in_osm_file(self) if self.args.update_osm and not status: print "I dati OSM erano già aggiornati all'ultimo minuto, o l'aggiornamento con osmupdate è stato interrotto.\ Per ripetere l'aggiornamento, lanciare nuovamente lo script con l'opzione -u." if not self.args.analyze: #"There's nothing left for me to tell you" sys.exit(1) else: if not os.path.isfile(self.wOSMFile): OSM.filter_wikipedia_data_in_osm_file(self) #Extract Wikipedia articles tagged in OSM with preferred language. #If an article is tagged in a foreign language, ask to Wikpedia #what is the corresponding article of the preferred language, so #that we can flag it as tagged aswell. print "\n- Estrai dal file OSM gli articoli già taggati" parseOSMData = ParseOSMData(self) #list of Wikipedia tags in OSM self.tagsInOSM = parseOSMData.allTags self.tagsData = parseOSMData.tagsData #list of tagged Wikipedia articles self.taggedTitles = parseOSMData.titles #tags with errors self.wrongTags = parseOSMData.wrongTags #ugly tags (with url, language capitalized...), not errors self.badTags = parseOSMData.badTags #add articles manually flagged as tagged in data/workaround/tagged.csv #in case the parser misses them (strange tags) self.add_tagged_articles() if self.args.show_coordinates_from_osm: print "\n--- Aggiungi le coordinate calcolare da OSM" parseOSMData.get_centroids() ### Manage Wikipedia data ############################################## #Read from 'non-mappable' file the categories and articles that #aren't mappable e.g. "Paintings in the X museum", #self.nonMappable = {mainCategory.name : {"articles" : [], "subcategories" : []}} self.nonMappable = self.read_non_mappable_items() #Check if we have Wikipedia data from CatScan of all the #categories in the project (config file) themesAndCatsNames = wikipedia_downloader.check_catscan_data( self, themesAndCatsNames) #Organize Wikipedia data. #self.themes = [Theme(), ...] # Theme().categories = [Category(), ...] # Category().subcategories = [Category(), ...] # Category().articles = [Article(), ...] #categories without catscan data self.categoriesWithoutData = [] allThemes = Themes(self, themesAndCatsNames) self.themes = allThemes.themesList #Organize data in regions, for a different visualization #self.regions = [Region()] # Region().categories = [Category(), ... ] self.regions = Regions(self).regionsList #Print names of all categories if self.args.print_categories_list: self.display_categories_names() if not self.args.category_info: #"There's nothing left for me to tell you" sys.exit(1) ### Merge OSM info into Wikipedia data ################################# #Add to Wikipedia categories and articles istances info about #their status in OSM: (tagged/not tagged), osm ids and counters print "\n- Controlla quali articoli nelle liste sono già taggati nel file OSM" for theme in self.themes: for category in theme.categories: category.check_articles_in_osm() self.titlesInOSM, self.titlesNotInOSM = allThemes.lists_of_titles_in_osm_or_not( ) #Ask to Wikipedia which articles have/have not Coord template. #Articles with article.hasTemplate == False will be marked on web pages. if self.args.show_missing_templates: print "\n- Controlla quali articoli non hanno il template Coord in Wikipedia" self.templatesStatus = wikipedia_downloader.read_old_templates_status( self) wikipedia_downloader.update_templates_status(self) #Set hasTemplate = False to articles without Coord template for theme in self.themes: for category in theme.categories: category.set_has_template_in_articles() #If an article is not already tagged in OSM but Wikipedia knows its #position, it is possible to add a link to zoom to that position #with JOSM. if self.args.show_link_to_wikipedia_coordinates: print "\n- Controlla di quali articoli non taggati Wikipedia conosce già la posizione" wikipedia_downloader.add_wikipedia_coordinates(self) #Save GeoJSON file with titles and coordinates known by Wikipedia self.save_titles_with_coords_geojson() if self.args.infer_coordinates_from_wikipedia: print "\n- Usa Nuts4Nuts per inferire la posizione di alcuni articoli" nuts4nuts_infer.infer_coordinates_with_nuts4nuts(self) #Download from Wikipedia the lists of redirects and create a file #so that they can then be manually copied to non_mappable file if self.args.update_redirects: wikipedia_downloader.find_redirects(self) #For debugging # print info about a specific category if self.args.category_info: self.print_category_info(self.args.category_info.replace(" ", "_")) if self.args.create_webpages: raw_input("\nContinue?[Press any key]") # write categories trees to text files (uncomment lines) if self.print_categories_to_text_files == "true": for theme in self.themes: for category in theme.categories: category.print_category_tree_to_file() #Read and update stats with the number of tagged articles self.dates, self.days = self.read_past_stats() download_other_countries = False self.todayDate, today = self.read_new_stats(download_other_countries) self.days.append(today) self.dates.append(self.todayDate) if len(self.dates) > 1 and self.todayDate == self.dates[-2]: #This is the second analysis of today. #Overwrite the previous statistics del self.dates[-2] del self.days[-2] print "\n Questa è la seconda volta che i dati vengono analizzati oggi. \ Il numero di articoli taggati sostituisce quelli precedenti nella tabella dei conteggi." #Count tags added by each user self.users = Users(self).users #Create a json file with the data (needed by non_mappable.html) tree = {"mappable": True, "name": "Main", "size": 1, "children": []} for theme in self.themes: for category in theme.categories: tree["children"].append(category.build_json_tree()) ifile = open(os.path.join(self.HTMLDIR, "json", "main.json"), "w") data = json.dumps(tree) ifile.write(data) ifile.close() #Create webpages if self.args.create_webpages: print "\n- Crea pagine web" Creator(self) #Save stats if self.args.create_webpages and self.args.save_stats: answer = "y" else: answer = raw_input( "\n- Salvo il numero di articoli mappati/da mappare in './data/stats/stats.csv'?\n [y/N]\n" ) if answer in ("y", "Y"): self.save_stats_to_csv() else: print "\nI nuovi conteggi non vengono salvati." #Copy files from html dir to outdir (for example a Dropbox directory) if self.args.copy: self.copy_html_files_to_outdir() print "\nDone."