Ejemplo n.º 1
0
 def handle(self, *args, **options):
     url = urllib.urlopen(DB_URL)
     zipfile = ZipFile(StringIO(url.read()))
     Loc.objects.all().delete()
     Geoalias.objects.filter(kind='full').delete()
     for line in zipfile.open(zipfile.namelist()[0]).readlines():
         line = line.rstrip('\n')
         (geonameid, name, asciiname, alternatenames, latitude, longitude,
          feature_class, feature_code, country_code, cc2, admin1_code,
          admin2_code, admin3_code, admin4_code, population, elevation, dem,
          timezone, modification_date) = line.split('\t')
         normalized_name = re.sub(r'[^a-z]', '', asciiname.lower())
         try:
             if (int(population) < 25000): continue
             st_point = 'POINT(%s %s)' % (longitude, latitude)
             l = Loc(
                 id=geonameid,
                 # name=normalized_name,
                 name=name,
                 country=country_code.lower(),
                 region=admin1_code,
                 pop=population,
                 lat=latitude,
                 lon=longitude,
                 point=st_point,
                 granularity='I',  ## CITY level
                 count=0)
             l.save()
             g = Geoalias(loc=l, word=normalized_name, kind='full', count=0)
             g.save()
         except:
             print "better errorhandling dude! %s" % (line)
Ejemplo n.º 2
0
def add_geoalias( loc, word, country ):
   g=Geoalias(
      loc=loc,
      word=word,
      kind='locode',
      country=country,
      count=0
   );
   g.save()
Ejemplo n.º 3
0
 def handle(self, *args, **options):
     url = urllib.urlopen(DB_URL)
     zipfile = ZipFile(StringIO(url.read()))
     #loc_set = set( Loc.objects.values_list('id', flat=True).order_by('id') )
     loc_dict = dict((o.id, o.name) for o in Loc.objects.all())
     ## flush before reimporting
     Geoalias.objects.filter(kind__exact=GEOALIAS_TYPE).delete()
     Geoalias.objects.filter(kind__exact='iata').delete()
     not_count = 0
     iata_count = 0
     count = 0
     for line in zipfile.open(ARCHIVE_NAME).readlines():
         line = line.rstrip('\n')
         (altid, geonameid, lang, altname, is_pref, is_short, is_colloq,
          is_historic) = line.split('\t')
         geonameid = int(geonameid)
         if geonameid in loc_dict and lang in lang_set:
             norm_altname = openipmap.utils.normalize_name(altname)
             if loc_dict[geonameid] != norm_altname and len(
                     loc_dict[geonameid]) > 0:
                 #print "diff %s %s %s -> %s" % ( geonameid, lang, loc_dict[geonameid], norm_altname )
                 g = Geoalias(loc_id=geonameid,
                              word=norm_altname,
                              kind=GEOALIAS_TYPE,
                              lang=lang,
                              count=0)
                 g.save()
             ## maybe compare to normal, and only insert if altname differs
             count += 1
         elif lang == 'iata' and geonameid in loc_dict:
             ## get IATA codes for geonames we have in our database
             import_iata(geonameid, altname.lower())
             iata_count += 1
         else:
             not_count += 1
     for iata_code, geonameid in iata_manual_imports.iteritems():
         if Geoalias.objects.filter(kind__exact='iata').filter(
                 word__exact=iata_code).count() == 0:
             import_iata(geonameid, iata_code)
             iata_count += 1
             print "didn't exist: %s -> %s" % (iata_code, geonameid)
         else:
             print "already exists: %s -> %s" % (iata_code, geonameid)
     print "yes:%s  no:%s iata:%s" % (count, not_count, iata_count)
Ejemplo n.º 4
0
def import_iata(geonameid, iatacode):
    g = Geoalias(loc_id=geonameid, word=iatacode.lower(), kind='iata', count=0)
    g.save()
Ejemplo n.º 5
0
   def handle(self, *args, **options):
      Geoalias.objects.filter(kind__exact='clli').delete()
      count=0
      cllis={}
      with open('./openipmap/data/cllis.txt', 'rb') as f:
         f.readline() ## remove header
         f.readline() ## remove header
         for line in f:
#            CLEVOH   Cleveland   OH US
             line = line.rstrip('\n')
             (clli,city,region,country) = line.split('\t')
             clli = clli.lower()
             clli = re.sub('[^a-z]','',clli)
             if len(clli) != 6: continue
             loc_str = '|'.join([ city.lower() , region.lower() , country.lower() ])
             if not clli in cllis:
               cllis[clli] = {}
             if not loc_str in cllis[clli]:
                cllis[clli][ loc_str ] = 1
             else:
                cllis[clli][ loc_str ] += 1
      for clli in cllis: 
         print "%s" % ( cllis[clli].items() )
         locs_sorted = sorted( cllis[clli].items() , key=lambda x:x[1], reverse=True )
         print "%s" % ( locs_sorted )
         most_common_loc = locs_sorted[0][0]
         (city,region,country) = most_common_loc.split('|')
         if country in ccmap: country = ccmap[country]
         if city in citymap: city=citymap[city]
         city_norm = re.sub('[^a-z]','',city)
         geoalias_list = Geoalias.objects.filter(word__exact=city_norm,kind__exact='full')
         loc_id_list = [obj.loc_id for obj in geoalias_list]
         loc_list = Loc.objects.filter(id__in=loc_id_list)
         if len( loc_list ) ==1:
            g=Geoalias(
               loc=loc_list[0],
               word=clli,
               kind='clli',
               count=0
            );
            g.save()
            count += 1
         elif len(loc_list) > 1:
            f_loc_list = loc_list.filter( country__exact=country )
            if len( f_loc_list ) == 1:
               g=Geoalias(
                  loc=f_loc_list[0],
                  word=clli,
                  kind='clli',
                  count=0
               );
               g.save()
               count += 1
            else:
               print "no match (after tie-down to country) %s: %s, %s, %s" % ( clli, city_norm, country, loc_list )
         elif len(loc_list) == 0:
            approx_list = Loc.objects.filter( name__contains=city_norm, country__exact=country )
            if len( approx_list ) == 1:
               g=Geoalias(
                  loc=approx_list[0],
                  word=clli,
                  kind='clli',
                  count=0
               );
               g.save()
               count += 1
            else:
               print "no match (after approx+country) %s: %s, %s, %s" % ( clli, city_norm, country, loc_list )
         else:
            print "no match %s: %s, %s, %s" % ( clli, city_norm, country, loc_list )
      print "total count: %s" % ( count )