def test_extract_dpae_two_files_diff(self):
     # Second file contains duplicated records and one record from the future and only 2 really new valid records.
     filename_first_month = self.get_data_file_path(FIRST_DPAE_FILE_NAME)
     filename_second_month = self.get_data_file_path(SECOND_DPAE_FILE_NAME)
     task = extract_dpae.DpaeExtractJob(filename_first_month)
     task.run()
     self.assertEqual(Hiring.query.count(), 6)
     task = extract_dpae.DpaeExtractJob(filename_second_month)
     task.run()
     self.assertEqual(Hiring.query.count(), 6+2)
 def test_extract_dpae_two_files_diff(self):
     # Second file contains one record from the future
     filename_first_month = self.get_data_file_path(FIRST_DPAE_FILE_NAME)
     filename_second_month = self.get_data_file_path(SECOND_DPAE_FILE_NAME)
     task = extract_dpae.DpaeExtractJob(filename_first_month)
     task.run()
     self.assertEqual(Hiring.query.count(), 6)
     task = extract_dpae.DpaeExtractJob(filename_second_month)
     task.run()
     # change 6+5 to 6+2, only 2 dpae is between 10/11/2016 and 10/12/2016 in SECOND_DPAE_FILE_NAME
     self.assertEqual(Hiring.query.count(), 6 + 2)
Exemple #3
0
 def test_extract_dpae_two_files_diff(self):
     # Updated file contains duplicated records and one record from the future and only 2 really new valid records.
     filename_first_month = self.get_data_file_path("LBB_XDPDPA_DPAE_20151010_20161110_20161110_174915.csv")
     filename_second_month = self.get_data_file_path("LBB_XDPDPA_DPAE_20151110_20161210_20161210_094110.csv")
     extract_dpae.DpaeExtractJob.backup_first = False
     task = extract_dpae.DpaeExtractJob(filename_first_month)
     task.run()
     self.assertEquals(Dpae.query.count(), 6)
     task = extract_dpae.DpaeExtractJob(filename_second_month)
     task.run()
     self.assertEquals(Dpae.query.count(), 6+2)
 def test_verify_right_number_dpae(self):
     self.assertEqual(Hiring.query.count(), 0)
     filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME)
     task = extract_dpae.DpaeExtractJob(filename)
     task.run()
     self.assertEqual(Hiring.query.count(), 6)
     # delete the file in the registry to simulate if importer job crash before regitring the file
     ImportTask.query.filter(
         ImportTask.filename == FIRST_DPAE_FILE_NAME).delete()
     DpaeStatistics.query.order_by(
         DpaeStatistics.most_recent_data_date.desc()).first().delete()
     task = extract_dpae.DpaeExtractJob(filename)
     self.assertEqual(Hiring.query.count(), 6)
Exemple #5
0
 def test_extract_dpae(self):
     self.assertEqual(Hiring.query.count(), 0)
     filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME)
     task = extract_dpae.DpaeExtractJob(filename)
     task.run()
     self.assertEqual(Hiring.query.count(), 6)
     # check if date_insertion is filled
     self.assertEqual(Hiring.query.filter(Hiring.date_insertion == None).count(), 0)
 def test_extract_bz2_format(self):
     filename = self.get_data_file_path(FIRST_DPAE_FILE_NAME + ".bz2")
     task = extract_dpae.DpaeExtractJob(filename)
     task.run()
     self.assertEqual(Hiring.query.count(), 6)
Exemple #7
0
 def test_extract_bz2_format(self):
     filename = self.get_data_file_path("LBB_XDPDPA_DPAE_20151010_20161110_20161110_174915.csv.bz2")
     extract_dpae.DpaeExtractJob.backup_first = False
     task = extract_dpae.DpaeExtractJob(filename)
     task.run()
     self.assertEquals(Dpae.query.count(), 6)