def handle(self, *args, **options): url = urllib.urlopen(DB_URL) zipfile = ZipFile(StringIO(url.read())) Loc.objects.all().delete() Geoalias.objects.filter(kind='full').delete() for line in zipfile.open(zipfile.namelist()[0]).readlines(): line = line.rstrip('\n') (geonameid, name, asciiname, alternatenames, latitude, longitude, feature_class, feature_code, country_code, cc2, admin1_code, admin2_code, admin3_code, admin4_code, population, elevation, dem, timezone, modification_date) = line.split('\t') normalized_name = re.sub(r'[^a-z]', '', asciiname.lower()) try: if (int(population) < 25000): continue st_point = 'POINT(%s %s)' % (longitude, latitude) l = Loc( id=geonameid, # name=normalized_name, name=name, country=country_code.lower(), region=admin1_code, pop=population, lat=latitude, lon=longitude, point=st_point, granularity='I', ## CITY level count=0) l.save() g = Geoalias(loc=l, word=normalized_name, kind='full', count=0) g.save() except: print "better errorhandling dude! %s" % (line)
def handle(self, *args, **options): url = urllib.urlopen( DB_URL ) zipfile = ZipFile(StringIO(url.read())) Loc.objects.all().delete() Geoalias.objects.filter(kind='full').delete() for line in zipfile.open( zipfile.namelist()[0] ).readlines(): line=line.rstrip('\n') (geonameid,name,asciiname,alternatenames,latitude,longitude,feature_class,feature_code,country_code,cc2,admin1_code,admin2_code,admin3_code,admin4_code,population,elevation,dem,timezone,modification_date) = line.split('\t') normalized_name = re.sub(r'[^a-z]','',asciiname.lower() ) try: if ( int(population) < 25000 ): continue st_point = 'POINT(%s %s)' % (longitude, latitude) l=Loc( id=geonameid, # name=normalized_name, name=name, country=country_code.lower(), region=admin1_code, pop=population, lat=latitude, lon=longitude, point=st_point, granularity='I', ## CITY level count=0 ) l.save() g=Geoalias( loc=l, word=normalized_name, kind='full', count=0 ); g.save() except: print "better errorhandling dude! %s" % ( line )
def import_iata(geonameid,iatacode): g=Geoalias( loc_id=geonameid, word=iatacode.lower(), kind='iata', count=0 ) g.save()
def add_geoalias( loc, word, country ): g=Geoalias( loc=loc, word=word, kind='locode', country=country, count=0 ); g.save()
def handle(self, *args, **options): url = urllib.urlopen(DB_URL) zipfile = ZipFile(StringIO(url.read())) #loc_set = set( Loc.objects.values_list('id', flat=True).order_by('id') ) loc_dict = dict((o.id, o.name) for o in Loc.objects.all()) ## flush before reimporting Geoalias.objects.filter(kind__exact=GEOALIAS_TYPE).delete() Geoalias.objects.filter(kind__exact='iata').delete() not_count = 0 iata_count = 0 count = 0 for line in zipfile.open(ARCHIVE_NAME).readlines(): line = line.rstrip('\n') (altid, geonameid, lang, altname, is_pref, is_short, is_colloq, is_historic) = line.split('\t') geonameid = int(geonameid) if geonameid in loc_dict and lang in lang_set: norm_altname = openipmap.utils.normalize_name(altname) if loc_dict[geonameid] != norm_altname and len( loc_dict[geonameid]) > 0: #print "diff %s %s %s -> %s" % ( geonameid, lang, loc_dict[geonameid], norm_altname ) g = Geoalias(loc_id=geonameid, word=norm_altname, kind=GEOALIAS_TYPE, lang=lang, count=0) g.save() ## maybe compare to normal, and only insert if altname differs count += 1 elif lang == 'iata' and geonameid in loc_dict: ## get IATA codes for geonames we have in our database import_iata(geonameid, altname.lower()) iata_count += 1 else: not_count += 1 for iata_code, geonameid in iata_manual_imports.iteritems(): if Geoalias.objects.filter(kind__exact='iata').filter( word__exact=iata_code).count() == 0: import_iata(geonameid, iata_code) iata_count += 1 print "didn't exist: %s -> %s" % (iata_code, geonameid) else: print "already exists: %s -> %s" % (iata_code, geonameid) print "yes:%s no:%s iata:%s" % (count, not_count, iata_count)
def handle(self, *args, **options): url = urllib.urlopen( DB_URL ) zipfile = ZipFile(StringIO(url.read())) #loc_set = set( Loc.objects.values_list('id', flat=True).order_by('id') ) loc_dict = dict( (o.id, o.name) for o in Loc.objects.all() ) ## flush before reimporting Geoalias.objects.filter(kind__exact=GEOALIAS_TYPE).delete() Geoalias.objects.filter(kind__exact='iata').delete() not_count=0 iata_count=0 count=0 for line in zipfile.open( ARCHIVE_NAME ).readlines(): line=line.rstrip('\n') (altid,geonameid,lang,altname,is_pref,is_short,is_colloq,is_historic) = line.split('\t') geonameid = int(geonameid) if geonameid in loc_dict and lang in lang_set: norm_altname = openipmap.utils.normalize_name( altname ) if loc_dict[geonameid] != norm_altname and len( loc_dict[geonameid] ) > 0: #print "diff %s %s %s -> %s" % ( geonameid, lang, loc_dict[geonameid], norm_altname ) g=Geoalias( loc_id=geonameid, word=norm_altname, kind=GEOALIAS_TYPE, lang=lang, count=0 ) g.save() ## maybe compare to normal, and only insert if altname differs count+=1 elif lang == 'iata' and geonameid in loc_dict: ## get IATA codes for geonames we have in our database import_iata( geonameid, altname.lower() ) iata_count+=1 else: not_count+=1 for iata_code,geonameid in iata_manual_imports.iteritems(): if Geoalias.objects.filter(kind__exact='iata').filter(word__exact=iata_code).count() == 0: import_iata( geonameid, iata_code ) iata_count+=1 print "didn't exist: %s -> %s" % ( iata_code, geonameid ) else: print "already exists: %s -> %s" % ( iata_code, geonameid ) print "yes:%s no:%s iata:%s" % ( count, not_count, iata_count )
def import_iata(geonameid, iatacode): g = Geoalias(loc_id=geonameid, word=iatacode.lower(), kind='iata', count=0) g.save()
def handle(self, *args, **options): Geoalias.objects.filter(kind__exact='clli').delete() count=0 cllis={} with open('./openipmap/data/cllis.txt', 'rb') as f: f.readline() ## remove header f.readline() ## remove header for line in f: # CLEVOH Cleveland OH US line = line.rstrip('\n') (clli,city,region,country) = line.split('\t') clli = clli.lower() clli = re.sub('[^a-z]','',clli) if len(clli) != 6: continue loc_str = '|'.join([ city.lower() , region.lower() , country.lower() ]) if not clli in cllis: cllis[clli] = {} if not loc_str in cllis[clli]: cllis[clli][ loc_str ] = 1 else: cllis[clli][ loc_str ] += 1 for clli in cllis: print "%s" % ( cllis[clli].items() ) locs_sorted = sorted( cllis[clli].items() , key=lambda x:x[1], reverse=True ) print "%s" % ( locs_sorted ) most_common_loc = locs_sorted[0][0] (city,region,country) = most_common_loc.split('|') if country in ccmap: country = ccmap[country] if city in citymap: city=citymap[city] city_norm = re.sub('[^a-z]','',city) geoalias_list = Geoalias.objects.filter(word__exact=city_norm,kind__exact='full') loc_id_list = [obj.loc_id for obj in geoalias_list] loc_list = Loc.objects.filter(id__in=loc_id_list) if len( loc_list ) ==1: g=Geoalias( loc=loc_list[0], word=clli, kind='clli', count=0 ); g.save() count += 1 elif len(loc_list) > 1: f_loc_list = loc_list.filter( country__exact=country ) if len( f_loc_list ) == 1: g=Geoalias( loc=f_loc_list[0], word=clli, kind='clli', count=0 ); g.save() count += 1 else: print "no match (after tie-down to country) %s: %s, %s, %s" % ( clli, city_norm, country, loc_list ) elif len(loc_list) == 0: approx_list = Loc.objects.filter( name__contains=city_norm, country__exact=country ) if len( approx_list ) == 1: g=Geoalias( loc=approx_list[0], word=clli, kind='clli', count=0 ); g.save() count += 1 else: print "no match (after approx+country) %s: %s, %s, %s" % ( clli, city_norm, country, loc_list ) else: print "no match %s: %s, %s, %s" % ( clli, city_norm, country, loc_list ) print "total count: %s" % ( count )