def create_locations(): """ Create the `location` type in ElasticSearch. """ actions = [] for city in geocoding.get_cities(): doc = { 'city_name': city['name'], 'location': { 'lat': city['coords']['lat'], 'lon': city['coords']['lon'] }, 'population': city['population'], 'slug': city['slug'], 'zipcode': city['zipcode'], } action = { '_op_type': 'index', '_index': settings.ES_INDEX, '_type': es.LOCATION_TYPE, '_source': doc } actions.append(action) bulk_actions(actions)
def test_get_cities(self): all_cities = geocoding.get_cities() found = False for city in all_cities: if city['name'] == "Paris": found = True break self.assertTrue(found)
def generate_city_choices(): cities_by_population = sorted(geocoding.get_cities(), key=itemgetter('population'), reverse=True) city_choices = [] for city in cities_by_population[:2000]: city_choices.append( [ (city['name'], city['zipcode']), math.log10(city['population']) ] ) return city_choices
def test_paris4eme_is_correctly_found(self): cities_zipcodes = [[city['name'], city['zipcode']] for city in geocoding.get_cities()] paris4eme_zipcodes = [x[1] for x in cities_zipcodes if x[1] == "75004"] self.assertEqual(len(paris4eme_zipcodes), 1) zipcode = paris4eme_zipcodes[0] self.assertEqual(zipcode, "75004") city = geocoding.get_city_by_zipcode(zipcode, "saint-denis") self.assertEqual(city['coords']['lat'], 48.8544006347656) self.assertEqual(city['coords']['lon'], 2.36240005493164)
def sitemap(): """ To rebuild the sitemap, simply run "make create_sitemap" and then commit the new sitemap.xml file. Currently you don't need to run it more than once as its content is pretty much static. """ pages = [] now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") cities = [ city for city in geocoding.get_cities() if city['zipcode'].endswith('00') ] top_cities = [(city['slug'], city['zipcode']) for city in sorted( cities, key=operator.itemgetter('population'), reverse=True)[:94]] rome_descriptions = list(settings.ROME_DESCRIPTIONS.values()) for rome in rome_descriptions: occupation = slugify(rome) for city, zipcode in top_cities: url = "https://labonneboite.pole-emploi.fr/entreprises/%s-%s/%s" % ( city, zipcode, occupation) pages.append((url, now_str)) # Handle max URLs in a sitemap # See https://en.wikipedia.org/wiki/Sitemaps#Sitemap_limits initialCount = len(pages) if initialCount > MAX_URLS: lineStart = '\n * SKIPPED: ' print( 'Warning: sitemap should have at most 50K URLs\nDrop these URLs, they will not be indexed in sitemap.xml', lineStart, lineStart.join(map(lambda p: p[0], pages[50000:]))) pages = pages[:MAX_URLS] # Write the sitemap to file sitemap_xml = render_template('sitemap.xml', pages=pages) sitemap_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "../web/static/sitemap.xml") with open(sitemap_filename, "w") as f: f.write(sitemap_xml) # Print summary print( "Generated sitemap.xml using %s pages. Dropped %s pages\nTotal: %s cities x %s rome_descriptions = %s pages" % ( len(pages), initialCount - len(pages), len(top_cities), len(rome_descriptions), initialCount, ))
def test_montigny_les_metz_is_correctly_found(self): cities_zipcodes = [[city['name'], city['zipcode']] for city in geocoding.get_cities()] montigny_zipcodes = [ x[1] for x in cities_zipcodes if x[0].startswith('Montigny-l') and x[0].endswith('s-Metz') ] self.assertEqual(len(montigny_zipcodes), 1) zipcode = montigny_zipcodes[0] self.assertEqual(zipcode, "57950") city = geocoding.get_city_by_zipcode(zipcode, "paris-4eme") self.assertEqual(city['coords']['lat'], 49.09692140157696) self.assertEqual(city['coords']['lon'], 6.1549924040022725)
def sitemap(): """ To rebuild the sitemap, simply run "make create_sitemap" and then commit the new sitemap.xml file. Currently you don't need to run it more than once as its content is pretty much static. """ pages = [] now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") cities = [ city for city in geocoding.get_cities() if city['zipcode'].endswith('00') ] top_cities = [(city['slug'], city['zipcode']) for city in sorted( cities, key=operator.itemgetter('population'), reverse=True)[:94]] rome_descriptions = list(settings.ROME_DESCRIPTIONS.values()) for rome in rome_descriptions: occupation = slugify(rome) for city, zipcode in top_cities: url = "https://labonneboite.pole-emploi.fr/entreprises/%s-%s/%s" % ( city, zipcode, occupation) pages.append((url, now_str)) # A sitemap should have at most 50K URLs. # See https://en.wikipedia.org/wiki/Sitemaps#Sitemap_limits if len(pages) >= 50000: raise Exception("sitemap should have at most 50K URLs") sitemap_xml = render_template('sitemap.xml', pages=pages) sitemap_filename = os.path.join( os.path.dirname(os.path.realpath(__file__)), "../web/static/sitemap.xml") with open(sitemap_filename, "w") as f: f.write(sitemap_xml) print( "generated sitemap.xml using %s pages (%s cities x %s rome_descriptions)" % (len(pages), len(top_cities), len(rome_descriptions)))
def generate_city_choices(): cities_by_population = sorted(geocoding.get_cities(), key=itemgetter('population'), reverse=True) city_choices = [] for city in cities_by_population[:2000]: city_choices.append( [ (city['name'], city['zipcode']), math.log10(city['population']) ] ) return city_choices CITY_CHOICES = generate_city_choices() COMMUNE_CHOICES = [city_['commune_id'] for city_ in geocoding.get_cities()] SIRET_CHOICES = generate_siret_choices() def weighted_choice(choices): total = sum(w for c, w in choices) r = random.uniform(0, total) upto = 0 for c, w in choices: if upto + w >= r: return c upto += w assert False, "Shouldn't get here"