Ejemplo n.º 1
0
 def test_alternate(self):
     '''Check alternate manifest with no location or skipheader
     '''
     try:
         parse(self.manifest_alternate, self.data)
     except:
         self.fail("Couldn't parse from alternate manifest.")
Ejemplo n.º 2
0
 def test_no_file(self):
     '''Check error raised for nonexistant file
     '''
     with self.assertRaises(MissingResourceError):
         parse(self.manifest, 'foobar.xlsx')
     with self.assertRaises(UnsupportedFiletypeError):
         parse(self.manifest, os.path.join(self.path, 'manifest.xlsx'))
Ejemplo n.º 3
0
 def test_data_obj_matches_sample(self):
     '''Check test JSON data is parsed and looks like ground truth data.
     '''
     for data in self.data:
         parsed_data = parse(self.manifest, data[0])
         parsed_data_obj = parsed_data['data'].to_dict(orient='records')
         self.assertEqual(data[2], parsed_data_obj)
Ejemplo n.º 4
0
 def test_data_df_matches_sample(self):
     '''Check test JSON data is parsed and looks like ground truth data.
     '''
     for data in self.data:
         parsed_data = parse(self.manifest, data[0])
         self.assertTrue(parsed_data['data'].sort_index(axis=1).equals(
             data[1]))
Ejemplo n.º 5
0
 def __init__(self, language_config):
     self.config = language_config['config']
     self.name = slugify(self.config['L1'])
     # parse
     self.data_objs = [
         parse(d['manifest'], d['resource'])
         for d in language_config['data']
     ]
     # validate
     for do in self.data_objs:
         do['data'] = do['data'].replace('', nan)
         if return_null(do['data']):
             logger.warning('Removing null rows')
             do['data'] = do['data'].dropna(subset=['word'], how='all')
     # transduce
     self.transduce()
     # sort
     self.sort()
     # join
     self._df = self.joined()
     # index
     self.index_key_to_column()
     # validate ID
     if not self.validate_id(self._df):
         logger.warning(
             "No value for 'entryID' was found in your data. Using index instead. Note, this will not be consistent across builds."
         )
         self._df['entryID'] = self._df.index.astype(str)
     # log dupes
     dupes = return_dupes(self._df)
Ejemplo n.º 6
0
 def test_data_obj_matches_sample(self):
     '''Check test pkl data is parsed and looks like ground truth SAMPLE_DATA_OBJ
     '''
     parsed_data = parse(self.manifest, self.data)
     parsed_data_obj = parsed_data['data'].to_dict(orient='records')
     self.assertEqual(SAMPLE_DATA_OBJ, parsed_data_obj)
Ejemplo n.º 7
0
 def test_data_df_matches_sample(self):
     '''Check test pkl data is parsed and looks like ground truth SAMPLE_DATA_DF
     '''
     parsed_data = parse(self.manifest, self.data)
     self.assertTrue(
         parsed_data['data'].sort_index(axis=1).equals(SAMPLE_DATA_DF))
Ejemplo n.º 8
0
 def test_missing_sheet(self):
     '''Check error raised for missing sheet in 'location'
     '''
     with self.assertRaises(MissingResourceError):
         parse(self.manifest, self.missing_sheet_data)
Ejemplo n.º 9
0
 def test_data_df_matches_sample(self):
     '''Check test XLSX data is parsed and looks like ground truth SAMPLE_DATA_DF
     '''
     parsed_data = parse(self.manifest, self.data)
     self.assertTrue(parsed_data['data'].equals(SAMPLE_DATA_DF_BLANK))
 def test_404(self):
     '''Test returns request exception from 404
     '''
     with self.assertRaises(RequestException):
         parse(self.json_manifest, 'https://www.google.com/foobar1')
 def test_no_connection(self):
     '''Check can't connect to site
     '''
     with self.assertRaises(MissingResourceError):
         parse(self.json_manifest, 'https://foo.bar')