def test_load_with_copy(self): "ensures data tables are loading correctly by loading these flat files" build_flat_files.build_flat_files(self.temp_dir, self.out_dir) #loads the data tables (INSERT method) fn=os.path.join(self.out_dir, 'my_sub_dir_1_data.csv') t_name='dir_1_data' table=build_tables.build_tables(db=DB, pdir=self.out_dir, drop_old=True) #grab the data and ensure it's right curs.execute('END') curs.execute("SELECT * FROM {}".format(t_name)) actual=curs.fetchall() target=[(1.0, 1.0, 10011.0, 20011.0), (1.0, 2.0, 10012.0, 20012.0), (1.0, 3.0, 10013.0, 20013.0), (1.0, 4.0, 10014.0, 20014.0), (2.0, 1.0, 10021.0, 20021.0), (2.0, 2.0, 10022.0, 20022.0), (2.0, 3.0, 10023.0, 20023.0), (2.0, 4.0, 10024.0, 20024.0), (3.0, 1.0, 10031.0, 20031.0), (3.0, 2.0, 10033.0, 20033.0), (3.0, 3.0, 10033.0, 20033.0), (3.0, 4.0, 10034.0, 20034.0), (4.0, 1.0, 10041.0, 20041.0), (4.0, 2.0, 10034.0, 20034.0), (4.0, 3.0, 10043.0, 20043.0), (4.0, 4.0, 10044.0, 20044.0)] for t, a in zip(target, actual): self.assertEqual(t, a, 'load_with_insert failed' )
def xtest_load_with_insert(self): "ensures data tables are loading correctly by loading these flat files" build_flat_files.build_flat_files(self.temp_dir, self.out_dir) #loads the data tables (INSERT method) files = os.listdir(self.out_dir) files.sort() first_file = files[0] fn = os.path.join(self.out_dir,first_file) t_name = utils_and_settings.get_table_name_from_fn(first_file) table=build_tables.load_with_insert(db=DB, t_name=t_name, file=fn, drop_old=True) #grab the data and ensure it's right curs.execute('END') curs.execute("SELECT * FROM {}".format(t_name)) actual=curs.fetchall() target=[(1.0, 1.0, 10011.0, 20011.0), (1.0, 2.0, 10012.0, 20012.0), (1.0, 3.0, 10013.0, 20013.0), (1.0, 4.0, 10014.0, 20014.0), (2.0, 1.0, 10021.0, 20021.0), (2.0, 2.0, 10022.0, 20022.0), (2.0, 3.0, 10023.0, 20023.0), (2.0, 4.0, 10024.0, 20024.0), (3.0, 1.0, 10031.0, 20031.0), (3.0, 2.0, 10033.0, 20033.0), (3.0, 3.0, 10033.0, 20033.0), (3.0, 4.0, 10034.0, 20034.0), (4.0, 1.0, 10041.0, 20041.0), (4.0, 2.0, 10034.0, 20034.0), (4.0, 3.0, 10043.0, 20043.0), (4.0, 4.0, 10044.0, 20044.0)] for t, a in zip(target, actual): self.assertEqual(t, a, 'load_with_insert failed' )
def test_flat_files_partitioned(self) : """make sure the flat files are created correctly, even when forced to be partitioned""" #This creates consolidated flat files from our test files build_flat_files.build_flat_files(self.temp_dir, self.out_dir, test_max_rows=2) #gather and sort the output files test_files=[] for root, dirs, files in os.walk(self.out_dir): for f in files: test_files.append(os.path.join(root, f)) test_files.sort() for t in test_files: """make sure that contents of the output files match what we expect. So what do we expect? Directory /my_sub_dir_1/ has two files called TRANS_OD__100 and TRANS_OD__200. Each file has a header row, a header column. Other elements are OD pairs (row=O, col =D). From these files, we are creating a new file with a header row: #header origin, destination, <file 100 header>, <file 200 header> ... designating origin, destination, and vectorized versions of file 100 and file 200 like this: <origin> <destination> <file 100 series><row><col> <file 200 series><row><col> The first few rows for my_sub_dir_1.csv would be be: 1, 1, 10011, 20011 1, 2, 10012, 20012 1, 3, 10013, 20013 ... the first data column ends with '100', designating the name of the file TRANS_OD_100 and the second begins with '200', designating the name of the file TRANS_OD_table_200 """ with open(t, 'r') as f: lines = 0 content= f.readlines() #strip the header and clean it up header=content.pop(0) header=header.replace('#','').strip().split('|') self.assertEqual(len(content), 8) #rows*cols #we'll check the first and last rows, along w/ total count first_row=content[0] last_row=content[-1] if 'my_sub_dir_1_data0' in os.path.splitext(os.path.basename(t))[0]: #the 100 and 200 series data (table data elements start with '100' and '200') self.assertEqual(first_row.strip(), '1.0,1.0,10011.0,20011.0') #o=1, d=1, (100 series, 11), (200 series, 11) self.assertEqual(last_row.strip(), '2.0,4.0,10024.0,20024.0') #o=4, d=4, (100 series, 11), (200 series, 11) if 'my_sub_dir_2_data0' in os.path.splitext(os.path.basename(t))[0]: #the 400 and 300 series data (table data elements with '300' and '400') self.assertEqual(first_row.strip(), '1.0,1.0,30011.0,40011.0') #o=1, d=1, (100 series, 11), (200 series, 11) self.assertEqual(last_row.strip() , '2.0,4.0,30024.0,40024.0') #o=4, d=4, (100 series, 11), (200 series, 11)
h = logging.handlers.RotatingFileHandler(LOG_FILE, 'a', maxBytes=10*1024*1024, backupCount=5) formatter = logging.Formatter('%(asctime)s - %(levelname)s - ' '%(filename)s:%(lineno)s - %(message)s', datefmt="%Y-%m-%d %H:%M:%S") h.setFormatter(formatter) logger.addHandler(h) logger.setLevel(LOG_LEVEL) if __name__=='__main__': 'main execution start' #leave these statements here - logging info imported from settings for ix, d in enumerate(dirs, start=1): data_dir=d['data'] scratch_dir = d['scratch'] msg='Data loading from {} \n...to database {}. \n...Logging to {} \n' print(msg.format(data_dir, DB, LOG_FILE)) #create the flat files for import build_flat_files.build_flat_files(data_dir, scratch_dir, test_max_rows=500) #create tables from header info in tables, then load data build_tables.build_tables(db=DB, pdir = scratch_dir, drop_old=True) if ix != len(dirs): logger.info("*******************************") logger.info("Beginning new scenario") logger.info("*******************************") describe_db.describe_db(db=DB)