def verify(db, number_of_tiles, files_hashes, continent, north, south, west, east): # For every tile, verify the bottom left coordinate. for file in files_hashes: # Strip .hgt.zip extension: file = file[1][0:-8] [lat,lon] = util.getLatLonFromFileName(file) # Only a smaller part of Australia (see below): if util.inBoundingBox(lat, lon, north, south, west, east): print "Verify " + file + "..." # Get top left altitude from file: coordinate_file = loadTile(continent, file)[1][0] # Get top left altitude from database: coordinate_db = db.fetchTopLeftAltitude(lat,lon) if coordinate_db != coordinate_file: print "Mismatch tile " + file[1] exit() # Check the total number of points in the database: print "Check the total number of points in the database..." total = db.query("SELECT count(pos) FROM altitude")[0][0] if not total == number_of_tiles * 1200 * 1200: print "Not all tiles have been (completely) inserted!" exit() print "All tiles seem to have made it into the database! Enjoy." exit()
def testInsertTileIntoDatabase(self): # Load example tile fulltile = loadTile('Australia', 'S37E145') tile = [] for row in fulltile[0:11]: tile.append(row[0:11]) # Get lat and lon from filename [lat,lon] = getLatLonFromFileName("S37E145") # Make the tile smaller, so this will be faster: # 11x11 tile: because the top row and right column are dropped, # only the bottom-left 10x10 tile will be stored in the # database. # Insert tile into our Cassandra ColumnFamily self.db_cas.insertTile(tile, lat, lon) # Check if the tile is indeed in the ColumnFamily tile_back = self.db_cas.readTile(lat, lon) for i in range(len(tile) - 1): for j in range(len(tile) - 1): self.assert_(tile_back[i][j] == tile[i+1][j])
def testInsertTileIntoDatabase(self): # Create table self.assert_(self.db_psycopg2.createTableAltitude()) # Load example tile fulltile = loadTile('Australia', 'S37E145') tile = [] for row in fulltile[0:11]: tile.append(row[0:11]) # Get lat and lon from filename [lat,lon] = getLatLonFromFileName("S37E145") # Make the tile smaller, so this will be faster: # 11x11 tile: because the top row and right column are dropped, # only the bottom-left 10x10 tile will be stored in the # database. # Insert tile into database # We use psycopg2 for the connection in this case. self.db_psycopg2.insertTile(tile, lat, lon) # Check if the tile is indeed in the database tile_back = self.db_psycopg2.readTile(lat, lon) for i in range(len(tile) - 1): for j in range(len(tile) - 1): self.assert_(tile_back[i][j] == tile[i+1][j])
def testInsertTileWithNull(self): # Create table self.assert_(self.db_psycopg2.createTableAltitude()) # Some tiles contain the value -32768, which means NULL (not implemented yet) # Tile S27E123 has several -32768 values, for example tile[1086][462] fulltile = loadTile('Australia', 'S27E123') self.assertEqual(fulltile[1086][462], -32768) # Take part of the tile around that area tile = [] for row in fulltile[1080:1091]: tile.append(row[460:471]) # Get lat and lon from filename [lat,lon] = getLatLonFromFileName("S27E123") # Insert tile into database self.db_psycopg2.insertTile(tile, lat, lon) # Check if the tile is indeed in the database tile_back = self.db_psycopg2.readTile(lat, lon) for i in range(len(tile) - 1): for j in range(len(tile) - 1): self.assert_(tile_back[i][j] == tile[i+1][j])
def main(): # First we make a list of all files that need to be download. This depends # on the arguments given to the program. # The first argument should be the continent: # * Africa # * Australia # * Eurasia # * Islands # * North_America # * South_America try: continent = "_".join(map(lambda s: s.capitalize(), re.split("[ _]", sys.argv[1]))) except: continent = "" if not continent in "Africa Australia Eurasia Islands North_America South_America".split(): usage() sys.exit(1) # First we get the list of files through an HTTP connection. http = httplib.HTTPConnection("dds.cr.usgs.gov") http.request("GET", "/srtm/version2_1/SRTM3/" + continent + "/") # Now list all tiles of that continent. # See http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/[continent]/ response = http.getresponse() html = response.read() files = re.findall(r'<a href="([^"]+)"', html) files.pop(0) # remove "Parent Directory" link # And close connection. http.close() # Now download all files using urllib.urlretrieve # Do we have a bounding box? [north, south, west, east] = util.getBoundingBox(sys.argv, 2) for i in range(len(files)): if not (os.path.exists("data/" + continent + "/" + files[i])): [lat, lon] = util.getLatLonFromFileName(files[i]) if util.inBoundingBox(lat, lon, north, south, west, east): print "Downloading " + files[i] + " (lat = " + str(lat) + " , lon = " + str(lon) + " )... (" + str( i + 1 ) + " of " + str(len(files)) + ")" (f, tmp) = tempfile.mkstemp() try: urllib.urlretrieve( "http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/" + continent + "/" + files[i], tmp ) os.close(f) shutil.move(tmp, "data/" + continent + "/" + files[i]) except Exception, msg: sys.stderr.write(str(msg) + "\n") os.remove(tmp)
def main(): # First we make a list of all files that need to be download. This depends # on the arguments given to the program. # The first argument should be the continent: # * Africa # * Australia # * Eurasia # * Islands # * North_America # * South_America try: continent = '_'.join(map(lambda s: s.capitalize(), re.split('[ _]', sys.argv[1]))) except: continent = "" if not continent in "Africa Australia Eurasia Islands North_America South_America".split(): usage() sys.exit(1) # First we get the list of files through an HTTP connection. http = httplib.HTTPConnection('dds.cr.usgs.gov') http.request("GET", "/srtm/version2_1/SRTM3/" + continent + "/"); # Now list all tiles of that continent. # See http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/[continent]/ response = http.getresponse() html = response.read() files = re.findall(r'<a href="([^"]+)"', html) files.pop(0) #remove "Parent Directory" link # And close connection. http.close() # Now download all files using urllib.urlretrieve # Do we have a bounding box? [north, south, west, east] = util.getBoundingBox(sys.argv, 2) for i in range(len(files)): if not (os.path.exists("data/" + continent + "/" + files[i])): [lat,lon] = util.getLatLonFromFileName(files[i]) if util.inBoundingBox(lat, lon, north, south, west, east): print "Downloading " + files[i] + " (lat = " + str(lat) + " , lon = " + str(lon) + " )... (" + str(i + 1) + " of " + str(len(files)) +")" (f, tmp) = tempfile.mkstemp() try: urllib.urlretrieve("http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/" + continent + "/" + files[i], tmp) os.close(f) shutil.move(tmp, "data/" + continent + "/" + files[i]) except Exception, msg: sys.stderr.write(str(msg) + "\n") os.remove(tmp)
def verify(db, number_of_tiles, files_hashes, continent, north, south, west, east): # For every tile, verify the bottom left coordinate. for file in files_hashes: # Strip .hgt.zip extension: file = file[1][0:-8] [lat, lon] = util.getLatLonFromFileName(file) # Only a smaller part of Australia (see below): if util.inBoundingBox(lat, lon, north, south, west, east): print "Verify " + file + "..." # Get top left altitude from file: coordinate_file = loadTile(continent, file)[1][0] # Get top left altitude from database: coordinate_db = db.fetchTopLeftAltitude(lat, lon) if coordinate_db != coordinate_file: print "Mismatch tile " + file[1] exit() # Check the total number of points in the database: print "Check the total number of points in the database..." sql = db.query("SELECT count(pos) FROM altitude") total = int(sql.getresult()[0][0]) if not total == number_of_tiles * 1200 * 1200: print "Not all tiles have been (completely) inserted!" exit() print "All tiles seem to have made it into the database! Enjoy." exit()
f.write(str(\ begin + (row-1) * 1200 + col\ ) + ", " + str(tile[row][col] ) + "\n") f.close() if __name__ == '__main__': # We will only upload 1 tile to the Google App Engine. This will take quite # a bit of time. For the offline data store, we will only "upload" the city # of Heidelberg; the offline data store is very slow. # For this we need tile N49E008. name = "N49E008" tile = loadTile("Eurasia", name) [lat, lon] = util.getLatLonFromFileName(name) if not ("online" in sys.argv or "offline" in sys.argv): print "Online or offline?" exit() if sys.argv[1] == "offline": # If we are offline, we'll only look the center of Heidelberg. # 49.39 --- 49.42 # 8.67 --- 8.71 # That corresponds to: row_top = int((1.0 - 0.42) * 1200.) row_bottom = int((1.0 - 0.39) * 1200.) col_left = int(0.67 * 1200.) col_right = int(0.71 * 1200.) # So that 1813 records
# python test/verify_download.py Australia # python test/verify_download.py Eurasia 54 47 0 16 # I could not find the 'official' MD5 checksums for the data so I created them # myself. So if you get an error message, that could also mean the MD5 # checksum in this script is wrong. In that case: please let me know. import hashlib import sys import os import re sys.path += [os.path.abspath('.')] from data import util, files continent = '_'.join(map(lambda s: s.capitalize(), re.split('[ _]', sys.argv[1]))) [north, south, west, east] = util.getBoundingBox(sys.argv, 2) files_hashes = util.getFilesHashes(continent) if __name__ == '__main__': for file_hash in files_hashes: [lat,lon] = util.getLatLonFromFileName(file_hash[1]) if util.inBoundingBox(lat, lon, north, south, west, east): f = file("data/" + continent + "/" + file_hash[1]) if(not hashlib.md5(f.read()).hexdigest() == file_hash[0]): print "Error in file " + file_hash[1] exit()
def main(): # First we make a list of all files that need to be download. This depends # on the arguments given to the program. # The first argument should be the continent: # * Africa # * Australia # * Eurasia # * Islands # * North_America # * South_America if len(sys.argv) > 1: continent = sys.argv[1] util.verifyIsContinent(continent) else: print "Please provide arguments \n",\ "First argument should be Africa, Australia, Eurasia, Islands, North_America or South_America.\n",\ "Second argument (optional) specifies from which tile to resume. Use full file name e.g. \n",\ "'N36W004.hgt.zip'. Set to 0 start at the first file. \n",\ "Argument 3-6 optionally specify a bounding box: north, south, west, east" exit() # First we get the list of files through an FTP connection. ftp = FTP('e0srp01u.ecs.nasa.gov') ftp.login() ftp.cwd("srtm/version2/SRTM3/" + continent) # Now list all tiles of that continent. # See ftp://e0srp01u.ecs.nasa.gov/srtm/version2/SRTM3/[continent]/ files = ftp.nlst() # And close connection. ftp.close() # Now download all files using urllib.urlretrieve # Determine if we need to resume at a certain point if(len(sys.argv) > 2): resume = sys.argv[2] if not(resume == "0"): skip = True print "Resume from " + resume + "..." else: skip = False else: skip = False # Do we have a bounding box? [north, south, west, east] = util.getBoundingBox(sys.argv, 3) for i in range(len(files)): if skip: if files[i] == resume: skip = False if not(skip): [lat,lon] = util.getLatLonFromFileName(files[i]) if util.inBoundingBox(lat, lon, north, south, west, east): print "Downloading " + files[i] + " (lat = " + str(lat) + " , lon = " + str(lon) + " )... (" + str(i + 1) + " of " + str(len(files)) +")" urllib.urlretrieve("ftp://e0srp01u.ecs.nasa.gov/srtm/version2/SRTM3/" + continent + "/" + files[i],"data/" + continent + "/" + files[i])
def main(): # First we make a list of all files that need to be download. This depends # on the arguments given to the program. # The first argument should be the continent: # * Africa # * Australia # * Eurasia # * Islands # * North_America # * South_America if len(sys.argv) > 1: continent = sys.argv[1] util.verifyIsContinent(continent) else: print "Please provide arguments \n",\ "First argument should be Africa, Australia, Eurasia, Islands, North_America or South_America.\n",\ "Second argument (optional) specifies from which tile to resume. Use full file name e.g. \n",\ "'N36W004.hgt.zip'. Set to 0 start at the first file. \n",\ "Argument 3-6 optionally specify a bounding box: north, south, west, east" exit() # First we get the list of files through an HTTP connection. http = httplib.HTTPConnection('dds.cr.usgs.gov') http.request("GET", "/srtm/version2_1/SRTM3/" + continent + "/"); # Now list all tiles of that continent. # See http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/[continent]/ response = http.getresponse() html = response.read() files = re.findall(r'<a href="([^"]+)"', html) files.pop(0) #remove "Parent Directory" link # And close connection. http.close() # Now download all files using urllib.urlretrieve # Determine if we need to resume at a certain point if(len(sys.argv) > 2): resume = sys.argv[2] if not(resume == "0"): skip = True print "Resume from " + resume + "..." else: skip = False else: skip = False # Do we have a bounding box? [north, south, west, east] = util.getBoundingBox(sys.argv, 3) for i in range(len(files)): if skip: if files[i] == resume: skip = False if not(skip): [lat,lon] = util.getLatLonFromFileName(files[i]) if util.inBoundingBox(lat, lon, north, south, west, east): print "Downloading " + files[i] + " (lat = " + str(lat) + " , lon = " + str(lon) + " )... (" + str(i + 1) + " of " + str(len(files)) +")" urllib.urlretrieve("http://dds.cr.usgs.gov/srtm/version2_1/SRTM3/" + continent + "/" + files[i],"data/" + continent + "/" + files[i])
def testGetLatLonFromFileName(self): self.assertEqual([-11,119], getLatLonFromFileName("S11E119")) self.assertEqual([11,119], getLatLonFromFileName("N11E119")) self.assertEqual([11,-119], getLatLonFromFileName("N11W119")) self.assertEqual([-11,-119], getLatLonFromFileName("S11W119"))
for col in range(left_col, right_col + 1): f.write(str(\ begin + (row-1) * 1200 + col\ ) + ", " + str(tile[row][col] ) + "\n") f.close() if __name__ == '__main__': # We will only upload 1 tile to the Google App Engine. This will take quite # a bit of time. For the offline data store, we will only "upload" the city # of Heidelberg; the offline data store is very slow. # For this we need tile N49E008. name = "N49E008" tile = loadTile("Eurasia", name) [lat,lon] = util.getLatLonFromFileName(name) if not ("online" in sys.argv or "offline" in sys.argv): print "Online or offline?" exit() if sys.argv[1] == "offline": # If we are offline, we'll only look the center of Heidelberg. # 49.39 --- 49.42 # 8.67 --- 8.71 # That corresponds to: row_top = int((1.0 - 0.42) * 1200.) row_bottom = int((1.0 - 0.39) * 1200.) col_left = int(0.67 * 1200.) col_right = int(0.71 * 1200.) # So that 1813 records
def main(): # First we make a list of all files that need to be download. This depends # on the arguments given to the program. # The first argument should be the continent: # * Africa # * Australia # * Eurasia # * Islands # * North_America # * South_America if len(sys.argv) > 1: continent = sys.argv[1] util.verifyIsContinent(continent) else: print "Please provide arguments \n",\ "First argument should be Africa, Australia, Eurasia, Islands, North_America or South_America.\n",\ "Second argument (optional) specifies from which tile to resume. Use full file name e.g. \n",\ "'N36W004.hgt.zip'. Set to 0 start at the first file. \n",\ "Argument 3-6 optionally specify a bounding box: north, south, west, east" exit() # First we get the list of files through an FTP connection. ftp = FTP('e0srp01u.ecs.nasa.gov') ftp.login() ftp.cwd("srtm/version2/SRTM3/" + continent) # Now list all tiles of that continent. # See ftp://e0srp01u.ecs.nasa.gov/srtm/version2/SRTM3/[continent]/ files = ftp.nlst() # And close connection. ftp.close() # Now download all files using urllib.urlretrieve # Determine if we need to resume at a certain point if (len(sys.argv) > 2): resume = sys.argv[2] if not (resume == "0"): skip = True print "Resume from " + resume + "..." else: skip = False else: skip = False # Do we have a bounding box? [north, south, west, east] = util.getBoundingBox(sys.argv, 3) for i in range(len(files)): if skip: if files[i] == resume: skip = False if not (skip): [lat, lon] = util.getLatLonFromFileName(files[i]) if util.inBoundingBox(lat, lon, north, south, west, east): print "Downloading " + files[i] + " (lat = " + str( lat) + " , lon = " + str(lon) + " )... (" + str( i + 1) + " of " + str(len(files)) + ")" urllib.urlretrieve( "ftp://e0srp01u.ecs.nasa.gov/srtm/version2/SRTM3/" + continent + "/" + files[i], "data/" + continent + "/" + files[i])
def main(): db_pg = DatabasePg(database_pg.db, database_pg.db_user, database_pg.db_pass) db_psycopg2 = DatabasePsycopg2(database_pg.db, database_pg.db_user, database_pg.db_pass) try: continent = sys.argv[1] except: print "Please specify the continent. Africa, Australia, Eurasia, Islands, North_America or South_America." # Does the user want to empty the database? if 'empty' in sys.argv: print "Deleting tables from databse..." db_pg.dropAllTables() print "Done..." exit() [north, south, west, east] = util.getBoundingBox(sys.argv, 3) files_hashes = util.getFilesHashes(continent) number_of_tiles = util.numberOfFiles(files_hashes, north, south, west, east) # Verify result? if 'verify' in sys.argv: verify(db_pg, number_of_tiles, files_hashes, continent, north, south, west, east) # If a tile name is given as the second argument it will resume from there. p = re.compile('[NSEW]\d*') resume_from = "" try: if(p.find(sys.argv[2])): resume_from = sys.argv[2] except: None db_pg.createTableAltitude() i = 0 for file in files_hashes: # Strip .hgt.zip extension: file = file[1][0:-8] # Get latitude and longitude from file name [lat,lon] = util.getLatLonFromFileName(file) if util.inBoundingBox(lat, lon, north, south, west, east): i = i + 1 # Are we resuming? if resume_from == file: resume_from = "" if resume_from == "": # Load tile from file tile = loadTile(continent, file) # First check if the tile is not already in the database: try: db_pg.fetchTopLeftAltitude(lat, lon) print("Skipping tile " + file + " (" + str(i) + " / " + str(number_of_tiles) + ") ...") except: print("Insert data for tile " + file + " (" + str(i) + " / " + str(number_of_tiles) + ") ...") db_psycopg2.insertTile(tile, lat, lon) print("All tiles inserted. Pleasy verify the result with python \ read_data.py verify")
def main(): db_cas = ColumnFamilyCass(database_cas.keyspace, database_cas.cf_name, database_cas.nodelist) # Does the user want to empty the column family? if 'empty' in sys.argv: print "Purging data." db_cas.purge() print "Done..." exit() try: continent = '_'.join(map(lambda s: s.capitalize(), re.split('[ _]', sys.argv[1]))) except: print "Please specify the continent. Africa, Australia, Eurasia, Islands, North_America or South_America." sys.exit(1) [north, south, west, east] = util.getBoundingBox(sys.argv, 3) files_hashes = util.getFilesHashes(continent) number_of_tiles = util.numberOfFiles(files_hashes, north, south, west, east) # Verify result? if 'verify' in sys.argv: verify(db_cas, number_of_tiles, files_hashes, continent, north, south, west, east) #@todo how does this works? # If a tile name is given as the second argument it will resume from there. p = re.compile('[NSEW]\d*') resume_from = "" try: if(p.find(sys.argv[2])): resume_from = sys.argv[2] except: None i = 0 for file in files_hashes: # Strip .hgt.zip extension: file = file[1][0:-8] # Get latitude and longitude from file name [lat,lon] = util.getLatLonFromFileName(file) if util.inBoundingBox(lat, lon, north, south, west, east): i = i + 1 # Are we resuming? if resume_from == file: resume_from = "" if resume_from == "": # First check if the tile is not already in the database: try: db_cas.fetchTopLeftAltitude(lat, lon) print("Skipping tile " + file + " (" + str(i) + " / " + str(number_of_tiles) + ") ...") except IndexError: print("Insert data for tile " + file + " (" + str(i) + " / " + str(number_of_tiles) + ") ...") # Load tile from file tile = loadTile(continent, file) db_cas.insertTile(tile, lat, lon) print("All tiles inserted. You will want to run a nodetool repair.") print("Import is done. Pleasy verify the result with python read_data_cas.py verify")