def import_zipfile(self, zip_filename): with _ZipFile(zip_filename, 'r') as zipfile: inner_filename = zip_filename.split('/')[-1][5:].replace('_XBB_txt.zip', '-XBB_TXT.txt') with zipfile.open(inner_filename) as inner_file: for line in inner_file: line = line.decode('iso-8859-1') for region_type, uid_range in RecordDb.RegionTypeToUidRangeInTextLine.items(): uid = line[uid_range[0]:uid_range[1]] if type == 'DisseminationBlock': if uid in self.correspondences.distribution_blocks: uid = self.correspondences.distribution_blocks[uid] elif type == 'DisseminationArea': if uid in self.correspondences.distribution_area: uid = self.correspondences.distribution_areas[uid] region_id = self._region_id_finder.get_id_for_type_and_uid(region_type, uid) if region_id: region_statistics = self._region_statistics[region_id] if line[47] == 'T': if region_type in ('Subdivision', 'Tract', 'DisseminationArea', 'DisseminationBlock'): region_statistics.note = 'counts for Indian reserves and settlements are not complete' else: population = int(line[10:18]) n_dwellings = int(line[18:26]) n_occupied_dwellings = int(line[26:34]) region_statistics.population += population
def _region_types_and_zipfiles(self): for region_type, key in FileLoader.RegionTypeFilenames: filename = FILENAME_PATTERN % key path = os.path.join(os.path.dirname(__file__), '..', 'db', 'statistics', 'region-profiles', filename) with _ZipFile(path) as zipfile: yield region_type, zipfile
def import_zipfile(self, zip_filename): with _ZipFile(zip_filename, 'r') as zipfile: inner_filename = zip_filename.split('/')[-1].replace( '_txt.zip', '_TXT.txt') with zipfile.open(inner_filename) as inner_file: for line in inner_file: line = line.decode('iso-8859-1') for region_type, uid_range in RecordDb.RegionTypeToUidRangeInTextLine.items( ): uid = line[uid_range[0]:uid_range[1]] region_id = self._region_id_finder.get_id_for_type_and_uid( region_type, uid) if region_id: region_statistics = self._region_statistics[ region_id] if line[47] == 'T': if region_type in ('Subdivision', 'Tract', 'DisseminationArea', 'DisseminationBlock'): region_statistics.note = 'counts for Indian reserves and settlements are not complete' else: population = int(line[10:18]) n_dwellings = int(line[18:26]) n_occupied_dwellings = int(line[26:34]) region_statistics.population += population region_statistics.n_dwellings += n_dwellings region_statistics.n_occupied_dwellings += n_occupied_dwellings
def import_zipfile(self, zip_filename, file_type): with _ZipFile(zip_filename, 'r') as zipfile: inner_filename = zip_filename.split('/')[-1].split('.')[0] data_filename = 'Generic_%s.xml' % (inner_filename, ) with zipfile.open(data_filename) as data_file: self._load_data(data_file, file_type)
def import_zipfile(self, zip_filename, file_type): with _ZipFile(zip_filename, "r") as zipfile: inner_filename = zip_filename.split("/")[-1].split(".")[0] data_filename = "Generic_%s.xml" % (inner_filename,) with zipfile.open(data_filename) as data_file: self._load_data(data_file, file_type)
def ClickerRegister(turningPointFileName): # extract TTSession.xml tpFile = _ZipFile(turningPointFileName) tpFile.extract("TTSession.xml") # read TTSession.xml file ttFile = open("TTSession.xml") xmldoc = _minidom.parseString(ttFile.readline()) # get click device IDS devices = [] iParticipants = 0 for child in xmldoc.getElementsByTagName("participant") : # print child.childNodes[0].localName,child.childNodes[0].childNodes[0].nodeValue deviceID = child.childNodes[0].childNodes[0].nodeValue devices.append(deviceID) iParticipants += 1 # print "Found ", iParticipants return(devices)
def zipfile_to_txtlines(filename): with _ZipFile(filename) as zipfile: for zipinfo in zipfile.infolist(): if zipinfo.filename.lower().endswith('.txt'): with zipfile.open(zipinfo) as binfile: with io.TextIOWrapper(binfile, 'iso-8859-1') as txtfile: for line in txtfile: yield line
def _download_dataset(url_str, extract=True, force=False, output_dir="."): """Download a remote dataset and extract the contents. Parameters ---------- url_str : string The URL to download from extract : bool If true, tries to extract compressed file (zip/gz/bz2) force : bool If true, forces to retry the download even if the downloaded file already exists. output_dir : string The directory to dump the file. Defaults to current directory. """ fname = output_dir + "/" + url_str.split("/")[-1] #download the file from the web if not _os.path.isfile(fname) or force: print "Downloading file from: ", url_str _urllib.urlretrieve(url_str, fname) if extract and fname[-3:] == "zip": print "Decompressing zip archive", fname _ZipFile(fname).extractall(output_dir) elif extract and fname[-6:] == ".tar.gz": print "Decompressing tar.gz archive", fname _tarfile.TarFile(fname).extractall(output_dir) elif extract and fname[-7:] == ".tar.bz2": print "Decompressing tar.bz2 archive", fname _tarfile.TarFile(fname).extractall(output_dir) elif extract and fname[-3:] == "bz2": print "Decompressing bz2 archive: ", fname outfile = open(fname.split(".bz2")[0], "w") print "Output file: ", outfile for line in _bz2.BZ2File(fname, "r"): outfile.write(line) outfile.close() else: print "File is already downloaded."
def _zip_to_dict(self, zip_filename, rel_column): ret = {} with _ZipFile(zip_filename, 'r') as zipfile: inner_filename = self._zip_filename_to_inner_txt_filename(zip_filename) with zipfile.open(inner_filename) as inner_file: for line in inner_file: columns = line.split(',') old_uid = columns[0] new_uid = columns[1] relation = columns[rel_column] if relation == '1' or relation == '2': ret[old_uid] = new_uid else: ret[old_uid] = None return ret
def _zip_to_dict(self, zip_filename, rel_column): ret = {} with _ZipFile(zip_filename, 'r') as zipfile: inner_filename = self._zip_filename_to_inner_txt_filename( zip_filename) with zipfile.open(inner_filename) as inner_file: for line in inner_file: columns = line.split(',') old_uid = columns[0] new_uid = columns[1] relation = columns[rel_column] if relation == '1' or relation == '2': ret[old_uid] = new_uid else: ret[old_uid] = None return ret
def get_gis( url="https://www.eia.gov/maps/map_data/CrudeOil_Pipelines_US_EIA.zip"): """ Returns a SpatialPointsDataFrame from a shapefile URL. Examples with EIA and Government of Alberta US Energy Information Agency: EIA crude pipelines : https://www.eia.gov/maps/map_data/CrudeOil_Pipelines_US_EIA.zip EIA Refinery Map : https://www.eia.gov/maps/map_data/Petroleum_Refineries_US_EIA.zip EIA Products Pipelines : https://www.eia.gov/maps/map_data/PetroleumProduct_Pipelines_US_EIA.zip EIA Products Terminals : https://www.eia.gov/maps/map_data/PetroleumProduct_Terminals_US_EIA.zip EIA NG Pipelines : https://www.eia.gov/maps/map_data/NaturalGas_InterIntrastate_Pipelines_US_EIA.zip EIA NG Storage : https://www.eia.gov/maps/map_data/PetroleumProduct_Terminals_US_EIA.zip EIA NG Hubs : https://www.eia.gov/maps/map_data/NaturalGas_TradingHubs_US_EIA.zip EIA LNG Terminals : https://www.eia.gov/maps/map_data/Lng_ImportExportTerminals_US_EIA.zip Alberta Oil Sands, Petroleum and Natural Gas AB : https://gis.energy.gov.ab.ca/GeoviewData/OS_Agreements_Shape.zip Parameters ---------- url : str URL of the zipped shapefile Return ------ Returns geopandas object Examples -------- >>> import risktools as rt >>> df = rt.data.get_gis("https://www.eia.gov/maps/map_data/CrudeOil_Pipelines_US_EIA.zip") """ try: import geopandas as _geopandas except: raise ImportError( "Geopandas not installed. Please install before running") try: from fiona.io import ZipMemoryFile as _ZMF except: raise ImportError("Fiona not installed. Please install before running") fn = _requests.get(url) # useful for when there are multiple directories or files. Takes first shape file for ff in _ZipFile(_BytesIO(fn.content)).namelist(): if ff[-4:] == ".shp": shp_file = ff break zf = _ZMF(fn.content) shp = zf.open(shp_file) return _geopandas.GeoDataFrame.from_features(shp, crs=shp.crs)
def _region_types_and_zipfiles(self): for region_type, key in FileLoader.RegionTypeFilenames: filename = FILENAME_PATTERN % key path = os.path.join(os.path.dirname(__file__), "..", "db", "statistics", "region-profiles", filename) with _ZipFile(path) as zipfile: yield region_type, zipfile