def test_write_all(self): hfo = HarFileObj.loadFromDisk(TestHarFileObj._dd + "test.har") # Must check loadFromDisk passes test to rely on results from this method hfo.writeToDisk("temp.har") # By default, writes all headers self.assertTrue(os.path.isfile("temp.har")) hfo = HarFileObj.loadFromDisk("temp.har") header_names = hfo.getHeaderArrayNames() test_hn = ['XXCD', 'XXCR', 'XXCP', 'XXHS', 'CHST', 'INTA', 'SIMP', 'SIM2', 'NH01', 'ARR7'] self.assertTrue(all([x == y for (x, y) in zip(header_names, test_hn)])) os.remove("temp.har")
def test_load_from_disk(self): hfo = HarFileObj.loadFromDisk(TestHarFileObj._dd + "test.har") header_names = hfo.getHeaderArrayNames() test_hn = ['XXCD', 'XXCR', 'XXCP', 'XXHS', 'CHST', 'INTA', 'SIMP', 'SIM2', 'NH01', 'ARR7'] self.assertTrue(all([x == y for (x,y) in zip(header_names, test_hn)]))
def test_get_real_headerarrays(self): shutil.copy2(TestHarFileObj._dd + "test.har", "test_get_real_headerarrays.har") hfo = HarFileObj.loadFromDisk("test_get_real_headerarrays.har") hn = hfo.getRealHeaderArrayNames() test_hn = ['NH01', 'ARR7'] self.assertTrue(all([x == y for (x, y) in zip(hn, test_hn)])) os.remove("test_get_real_headerarrays.har")
def test_overwrite_header(self): shutil.copy2(TestHarFileObj._dd + "test.har", "test_overwrite_header.har") hfo = HarFileObj.loadFromDisk("test_overwrite_header.har") # Must check loadFromDisk passes test to rely on results from this method hao = hfo.getHeaderArrayObjs(["ARR7"])[0] hao["array"][0,0,0,0,0,0,0] = 42.0 hfo.writeToDisk("test_overwrite_header.har") hfo = HarFileObj.loadFromDisk("test_overwrite_header.har") header_names = hfo.getHeaderArrayNames() # header_names = hfo["hfio"].getHeaderArrayNames() test_hn = ['XXCD', 'XXCR', 'XXCP', 'XXHS', 'CHST', 'INTA', 'SIMP', 'SIM2', 'NH01', 'ARR7'] self.assertTrue(all([x == y for (x, y) in zip(header_names, test_hn)])) hao = hfo.getHeaderArrayObjs(["ARR7"])[0] self.assertTrue(np.isclose(hao["array"][0,0,0,0,0,0,0], 42.0)) os.remove("test_overwrite_header.har")
def test_attributes_style(self): hfo = HarFileObj.loadFromDisk(TestHarFileObj._dd + "test.har") # Test getter method self.assertTrue(hfo.head_arrs == hfo.getHeaderArrayObjs()) # Test setter method hfo.head_arrs = hfo.getHeaderArrayObjs() self.assertTrue(hfo.head_arrs == hfo.getHeaderArrayObjs()) # Test incorrect type with self.assertRaises(TypeError): hfo.head_arrs = {} with self.assertRaises(TypeError): hfo.head_arrs = ["a"]
def test_addremove_header_array_obj(self): shutil.copy2(TestHarFileObj._dd + "test.har", "test_remove_header_array.har") hfo = HarFileObj.loadFromDisk("test_remove_header_array.har") hao = hfo.removeHeaderArrayObjs("INTA") hn = hfo.getHeaderArrayNames() test_hn = ['XXCD', 'XXCR', 'XXCP', 'XXHS', 'CHST', 'SIMP', 'SIM2', 'NH01', 'ARR7'] self.assertTrue(all([x == y for (x, y) in zip(hn, test_hn)])) hfo.addHeaderArrayObjs(hao) hn = hfo.getHeaderArrayNames() test_hn = ['XXCD', 'XXCR', 'XXCP', 'XXHS', 'CHST', 'SIMP', 'SIM2', 'NH01', 'ARR7', 'INTA'] self.assertTrue(all([x == y for (x, y) in zip(hn, test_hn)])) os.remove("test_remove_header_array.har")
def data2har(data, allDims): """ Writes data from pandas to ViewHar format (GEMPACK data format). Requires harpy module to be installed. See https://github.com/GEMPACKsoftware/HARPY for info. Input format: A dictionary, where key is the coefficient name. Dict value is a tuple, containing following data in the following order: DATA: 1. data to write (can be a single parameter, pd.DataFrame, pd.Series..) 2. header name 3. long name to explain data content 4. a list containing the dimensions of the data allDims: the dimensions that appear in the input data must be given as a separate dictionary as follows: key: dimension name value: list of dimension values. e.g allDims = {"COM": ["COM1", "COM2", "COM3"]} """ HARfiles = HarFileObj() for key in data: # If it is a list of SETS: if type(data[key]) == list: newHead = HAO.HeaderArrayFromData(name=key, array=np.array(data[key])) HARfiles.addHeaderArrayObj(newHead) else: # If it is a tuple with numeric data: array = data[key][0] name = data[key][1] long_name = data[key][2] dimensions = data[key][3] coeff_name = key if type(array) == float or type(array) == int or type( array) == np.float64: array = np.array(array, dtype=np.float32) if type(array) == np.ndarray: array = array.astype(np.float32) else: array = np.array(array.values, dtype=np.float32) sets = [] if not dimensions: pass else: for dim in dimensions: if dim not in allDims.keys(): raise ValueError( "Dimension", dim, "not specified! Check the dimension dictionary.") else: setDict = {} setDict["name"] = dim setDict["dim_desc"] = allDims[dim] setDict["dim_type"] = "Set" sets.append(setDict) newHead = HAO.HeaderArrayFromData(name=name, array=array, coeff_name=coeff_name, long_name=long_name, sets=sets) HARfiles.addHeaderArrayObj(newHead) return HARfiles
def test_get_header_array_obj(self): hfo = HarFileObj.loadFromDisk( TestHarFileObj._dd + "test.har") # Must check loadFromDisk passes test to rely on results from this method hao = hfo.getHeaderArrayObjs(["ARR7"])[0] self.assertTrue(isinstance(hao, HeaderArrayObj))
def test_is_valid( self): # Relies on external functions operating correctly... hfo = HarFileObj.loadFromDisk(TestHeaderArray._dd + "test.har") haos = hfo.getHeaderArrayObjs() for hao in haos: self.assertTrue(hao.is_valid())
}, **{ "I_681+68209+683": "I_68", "I_68201_68202": "I_68A" } }, inplace=True) regionalData[k].reset_index(drop=True, inplace=True) #%% regOutput = regionalData["Output and employment by region"].copy() regHH = regionalData["Households' transactions by region"].copy() #%% regBaseData = HarFileObj.loadFromDisk(harFolder + "/basedata30.har") regInd = regBaseData.getHeaderArrayObj("IND")["array"].tolist() regCom = regBaseData.getHeaderArrayObj("COM")["array"].tolist() regSrc = regBaseData.getHeaderArrayObj("SRC")["array"].tolist() regMar = regBaseData.getHeaderArrayObj("MAR")["array"].tolist() regInd = [i.strip(' ') for i in regInd] regCom = [c.strip(' ') for c in regCom] regSrc = [s.strip(' ') for s in regSrc] regMar = [m.strip(' ') for m in regMar] #%% # Check that industries in the aggregated basedata match with the regional data: set(regOutput.Industry) == set(regInd)
import dataGetterFunction as dgf import harWriterFunction as hwf import mapperFunction as imf import checkerFunctions as cfs #%% # Choose base year for data: baseYear = 2014 # Raw data folder: rawFolder = "rawdata" # Folder for output HAR-files: harFolder = "hardata" #%% # Read data from previous steps: baseData = HarFileObj.loadFromDisk(harFolder + "/basedataNEW.har") #%% [markdown] # #### Read data entries from previous steps: #%% # Sets: COM = baseData.getHeaderArrayObj("COM")["array"].tolist() IND = baseData.getHeaderArrayObj("IND")["array"].tolist() SRC = baseData.getHeaderArrayObj("SRC")["array"].tolist() OCC = baseData.getHeaderArrayObj("OCC")["array"].tolist() MAR = baseData.getHeaderArrayObj("MAR")["array"].tolist() # HarFileObj leaves some trailing whitespaces to some entries (this may have changed in more recent versions). # Remove them with: COM = [c.strip(' ') for c in COM] IND = [i.strip(' ') for i in IND]