Esempio n. 1
0
 def test_load_with_copy(self):
     "ensures data tables are loading correctly by loading these flat files"
     build_flat_files.build_flat_files(self.temp_dir, self.out_dir)
     #loads the data tables (INSERT method)
     fn=os.path.join(self.out_dir, 'my_sub_dir_1_data.csv')
     t_name='dir_1_data'
     table=build_tables.build_tables(db=DB, pdir=self.out_dir, drop_old=True)
     
     #grab the data and ensure it's right
     curs.execute('END')
     curs.execute("SELECT * FROM {}".format(t_name))
     actual=curs.fetchall()
     
     target=[(1.0, 1.0, 10011.0, 20011.0),
             (1.0, 2.0, 10012.0, 20012.0),
             (1.0, 3.0, 10013.0, 20013.0),
             (1.0, 4.0, 10014.0, 20014.0),
             (2.0, 1.0, 10021.0, 20021.0),
             (2.0, 2.0, 10022.0, 20022.0),
             (2.0, 3.0, 10023.0, 20023.0),
             (2.0, 4.0, 10024.0, 20024.0),
             (3.0, 1.0, 10031.0, 20031.0),
             (3.0, 2.0, 10033.0, 20033.0),
             (3.0, 3.0, 10033.0, 20033.0),
             (3.0, 4.0, 10034.0, 20034.0),
             (4.0, 1.0, 10041.0, 20041.0),
             (4.0, 2.0, 10034.0, 20034.0),
             (4.0, 3.0, 10043.0, 20043.0),
             (4.0, 4.0, 10044.0, 20044.0)]
     
     for t, a in zip(target, actual):
         self.assertEqual(t, a, 'load_with_insert failed'  )
Esempio n. 2
0
    def xtest_load_with_insert(self):
        "ensures data tables are loading correctly by loading these flat files"
        build_flat_files.build_flat_files(self.temp_dir, self.out_dir)
        #loads the data tables (INSERT method)
        files = os.listdir(self.out_dir)
        files.sort()
        first_file = files[0]
        fn = os.path.join(self.out_dir,first_file) 
        t_name = utils_and_settings.get_table_name_from_fn(first_file)
        table=build_tables.load_with_insert(db=DB, t_name=t_name, file=fn, drop_old=True)

        #grab the data and ensure it's right
        curs.execute('END')
        curs.execute("SELECT * FROM {}".format(t_name))
        actual=curs.fetchall()
        
        target=[(1.0, 1.0, 10011.0, 20011.0),
                (1.0, 2.0, 10012.0, 20012.0),
                (1.0, 3.0, 10013.0, 20013.0),
                (1.0, 4.0, 10014.0, 20014.0),
                (2.0, 1.0, 10021.0, 20021.0),
                (2.0, 2.0, 10022.0, 20022.0),
                (2.0, 3.0, 10023.0, 20023.0),
                (2.0, 4.0, 10024.0, 20024.0),
                (3.0, 1.0, 10031.0, 20031.0),
                (3.0, 2.0, 10033.0, 20033.0),
                (3.0, 3.0, 10033.0, 20033.0),
                (3.0, 4.0, 10034.0, 20034.0),
                (4.0, 1.0, 10041.0, 20041.0),
                (4.0, 2.0, 10034.0, 20034.0),
                (4.0, 3.0, 10043.0, 20043.0),
                (4.0, 4.0, 10044.0, 20044.0)]
        
        for t, a in zip(target, actual):
            self.assertEqual(t, a, 'load_with_insert failed'  )            
Esempio n. 3
0
    def test_flat_files_partitioned(self)    :
        """make sure the flat files are created correctly, even when 
            forced to be partitioned"""
        
        #This creates consolidated flat files from our test files
        build_flat_files.build_flat_files(self.temp_dir, self.out_dir, test_max_rows=2)

        #gather and sort the output files
        test_files=[]
        for root, dirs, files in os.walk(self.out_dir):
            for f in files:
                test_files.append(os.path.join(root, f))       
        test_files.sort()
        
        for t in test_files:
            """make sure that contents of the output files match what we expect.  So what do we expect?
               
               Directory /my_sub_dir_1/ has two files called TRANS_OD__100 and TRANS_OD__200.
               Each file has a header row, a header column. Other elements are OD pairs (row=O, col =D).
               
               From these files, we are creating a new file with a header row: 
               #header  origin, destination, <file 100 header>, <file 200 header>
               ... designating origin, destination, and vectorized versions of file 100 and file 200 like this:
               
               <origin>  <destination>   <file 100 series><row><col>  <file 200 series><row><col>
               
               The first few rows for my_sub_dir_1.csv would be be:
               1, 1, 10011, 20011
               1, 2, 10012, 20012
               1, 3, 10013, 20013
               
               ... the first data column ends with '100', designating the name of the file TRANS_OD_100 and the second
               begins with '200', designating the name of the file TRANS_OD_table_200
               
            """
            
            with open(t, 'r') as f:
                lines = 0
                content= f.readlines()
                
                #strip the header and clean it up
                header=content.pop(0)
                header=header.replace('#','').strip().split('|')
                
                self.assertEqual(len(content), 8) #rows*cols
               
                #we'll check the first and last rows, along w/ total count
                first_row=content[0]
                last_row=content[-1]
                    
                if 'my_sub_dir_1_data0' in os.path.splitext(os.path.basename(t))[0]:
                    #the 100 and 200 series data (table data elements start with '100' and '200')
                    self.assertEqual(first_row.strip(), '1.0,1.0,10011.0,20011.0')  #o=1, d=1, (100 series, 11), (200 series, 11)
                    self.assertEqual(last_row.strip(),  '2.0,4.0,10024.0,20024.0')  #o=4, d=4, (100 series, 11), (200 series, 11)
                if 'my_sub_dir_2_data0' in os.path.splitext(os.path.basename(t))[0]:
                    #the 400 and 300 series data (table data elements  with '300' and '400')
                    self.assertEqual(first_row.strip(), '1.0,1.0,30011.0,40011.0')  #o=1, d=1, (100 series, 11), (200 series, 11)
                    self.assertEqual(last_row.strip() , '2.0,4.0,30024.0,40024.0')  #o=4, d=4, (100 series, 11), (200 series, 11) 
Esempio n. 4
0
h = logging.handlers.RotatingFileHandler(LOG_FILE, 
                                         'a', 
                                         maxBytes=10*1024*1024, 
                                         backupCount=5)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - '
                              '%(filename)s:%(lineno)s - %(message)s',
                              datefmt="%Y-%m-%d %H:%M:%S")
h.setFormatter(formatter)
logger.addHandler(h)
logger.setLevel(LOG_LEVEL)

if __name__=='__main__':
    'main execution start'
    #leave these statements here - logging info imported from settings
    for ix, d in enumerate(dirs, start=1):
        data_dir=d['data']
        scratch_dir = d['scratch']
        msg='Data loading from {} \n...to database {}. \n...Logging to {} \n'
        print(msg.format(data_dir, DB, LOG_FILE))
        #create the flat files for import
        build_flat_files.build_flat_files(data_dir, scratch_dir, test_max_rows=500)
        #create tables from header info in tables, then load data
        build_tables.build_tables(db=DB, pdir = scratch_dir, drop_old=True)
        if ix != len(dirs):
            logger.info("*******************************")
            logger.info("Beginning new scenario")
            logger.info("*******************************")
    
    describe_db.describe_db(db=DB)