Example #1
0
def create_locations():
    """
    Create the `location` type in ElasticSearch.
    """
    actions = []
    for city in geocoding.get_cities():
        doc = {
            'city_name': city['name'],
            'location': {
                'lat': city['coords']['lat'],
                'lon': city['coords']['lon']
            },
            'population': city['population'],
            'slug': city['slug'],
            'zipcode': city['zipcode'],
        }
        action = {
            '_op_type': 'index',
            '_index': settings.ES_INDEX,
            '_type': es.LOCATION_TYPE,
            '_source': doc
        }
        actions.append(action)

    bulk_actions(actions)
 def test_get_cities(self):
     all_cities = geocoding.get_cities()
     found = False
     for city in all_cities:
         if city['name'] == "Paris":
             found = True
             break
     self.assertTrue(found)
Example #3
0
def generate_city_choices():
    cities_by_population = sorted(geocoding.get_cities(), key=itemgetter('population'), reverse=True)
    city_choices = []
    for city in cities_by_population[:2000]:
        city_choices.append(
            [
                (city['name'], city['zipcode']), math.log10(city['population'])
            ]
        )
    return city_choices
 def test_paris4eme_is_correctly_found(self):
     cities_zipcodes = [[city['name'], city['zipcode']]
                        for city in geocoding.get_cities()]
     paris4eme_zipcodes = [x[1] for x in cities_zipcodes if x[1] == "75004"]
     self.assertEqual(len(paris4eme_zipcodes), 1)
     zipcode = paris4eme_zipcodes[0]
     self.assertEqual(zipcode, "75004")
     city = geocoding.get_city_by_zipcode(zipcode, "saint-denis")
     self.assertEqual(city['coords']['lat'], 48.8544006347656)
     self.assertEqual(city['coords']['lon'], 2.36240005493164)
Example #5
0
def sitemap():
    """
    To rebuild the sitemap,
    simply run "make create_sitemap" and then commit the new sitemap.xml file.
    Currently you don't need to run it more than once as its content is pretty much static.
    """
    pages = []
    now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")

    cities = [
        city for city in geocoding.get_cities()
        if city['zipcode'].endswith('00')
    ]
    top_cities = [(city['slug'], city['zipcode']) for city in sorted(
        cities, key=operator.itemgetter('population'), reverse=True)[:94]]

    rome_descriptions = list(settings.ROME_DESCRIPTIONS.values())

    for rome in rome_descriptions:
        occupation = slugify(rome)
        for city, zipcode in top_cities:
            url = "https://labonneboite.pole-emploi.fr/entreprises/%s-%s/%s" % (
                city, zipcode, occupation)
            pages.append((url, now_str))

    # Handle max URLs in a sitemap
    # See https://en.wikipedia.org/wiki/Sitemaps#Sitemap_limits
    initialCount = len(pages)
    if initialCount > MAX_URLS:
        lineStart = '\n * SKIPPED: '
        print(
            'Warning: sitemap should have at most 50K URLs\nDrop these URLs, they will not be indexed in sitemap.xml',
            lineStart, lineStart.join(map(lambda p: p[0], pages[50000:])))
        pages = pages[:MAX_URLS]

    # Write the sitemap to file
    sitemap_xml = render_template('sitemap.xml', pages=pages)
    sitemap_filename = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "../web/static/sitemap.xml")
    with open(sitemap_filename, "w") as f:
        f.write(sitemap_xml)

    # Print summary
    print(
        "Generated sitemap.xml using %s pages. Dropped %s pages\nTotal: %s cities x %s rome_descriptions = %s pages"
        % (
            len(pages),
            initialCount - len(pages),
            len(top_cities),
            len(rome_descriptions),
            initialCount,
        ))
 def test_montigny_les_metz_is_correctly_found(self):
     cities_zipcodes = [[city['name'], city['zipcode']]
                        for city in geocoding.get_cities()]
     montigny_zipcodes = [
         x[1] for x in cities_zipcodes
         if x[0].startswith('Montigny-l') and x[0].endswith('s-Metz')
     ]
     self.assertEqual(len(montigny_zipcodes), 1)
     zipcode = montigny_zipcodes[0]
     self.assertEqual(zipcode, "57950")
     city = geocoding.get_city_by_zipcode(zipcode, "paris-4eme")
     self.assertEqual(city['coords']['lat'], 49.09692140157696)
     self.assertEqual(city['coords']['lon'], 6.1549924040022725)
def sitemap():
    """
    To rebuild the sitemap,
    simply run "make create_sitemap" and then commit the new sitemap.xml file.
    Currently you don't need to run it more than once as its content is pretty much static.
    """
    pages = []
    now_str = datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")

    cities = [
        city for city in geocoding.get_cities()
        if city['zipcode'].endswith('00')
    ]
    top_cities = [(city['slug'], city['zipcode']) for city in sorted(
        cities, key=operator.itemgetter('population'), reverse=True)[:94]]

    rome_descriptions = list(settings.ROME_DESCRIPTIONS.values())

    for rome in rome_descriptions:
        occupation = slugify(rome)
        for city, zipcode in top_cities:
            url = "https://labonneboite.pole-emploi.fr/entreprises/%s-%s/%s" % (
                city, zipcode, occupation)
            pages.append((url, now_str))

    # A sitemap should have at most 50K URLs.
    # See https://en.wikipedia.org/wiki/Sitemaps#Sitemap_limits
    if len(pages) >= 50000:
        raise Exception("sitemap should have at most 50K URLs")

    sitemap_xml = render_template('sitemap.xml', pages=pages)
    sitemap_filename = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "../web/static/sitemap.xml")
    with open(sitemap_filename, "w") as f:
        f.write(sitemap_xml)

    print(
        "generated sitemap.xml using %s pages (%s cities x %s rome_descriptions)"
        % (len(pages), len(top_cities), len(rome_descriptions)))
Example #8
0
def generate_city_choices():
    cities_by_population = sorted(geocoding.get_cities(), key=itemgetter('population'), reverse=True)
    city_choices = []
    for city in cities_by_population[:2000]:
        city_choices.append(
            [
                (city['name'], city['zipcode']), math.log10(city['population'])
            ]
        )
    return city_choices


CITY_CHOICES = generate_city_choices()


COMMUNE_CHOICES = [city_['commune_id'] for city_ in geocoding.get_cities()]


SIRET_CHOICES = generate_siret_choices()


def weighted_choice(choices):
    total = sum(w for c, w in choices)
    r = random.uniform(0, total)
    upto = 0
    for c, w in choices:
        if upto + w >= r:
            return c
        upto += w
    assert False, "Shouldn't get here"