def test_alternate(self): '''Check alternate manifest with no location or skipheader ''' try: parse(self.manifest_alternate, self.data) except: self.fail("Couldn't parse from alternate manifest.")
def test_no_file(self): '''Check error raised for nonexistant file ''' with self.assertRaises(MissingResourceError): parse(self.manifest, 'foobar.xlsx') with self.assertRaises(UnsupportedFiletypeError): parse(self.manifest, os.path.join(self.path, 'manifest.xlsx'))
def test_data_obj_matches_sample(self): '''Check test JSON data is parsed and looks like ground truth data. ''' for data in self.data: parsed_data = parse(self.manifest, data[0]) parsed_data_obj = parsed_data['data'].to_dict(orient='records') self.assertEqual(data[2], parsed_data_obj)
def test_data_df_matches_sample(self): '''Check test JSON data is parsed and looks like ground truth data. ''' for data in self.data: parsed_data = parse(self.manifest, data[0]) self.assertTrue(parsed_data['data'].sort_index(axis=1).equals( data[1]))
def __init__(self, language_config): self.config = language_config['config'] self.name = slugify(self.config['L1']) # parse self.data_objs = [ parse(d['manifest'], d['resource']) for d in language_config['data'] ] # validate for do in self.data_objs: do['data'] = do['data'].replace('', nan) if return_null(do['data']): logger.warning('Removing null rows') do['data'] = do['data'].dropna(subset=['word'], how='all') # transduce self.transduce() # sort self.sort() # join self._df = self.joined() # index self.index_key_to_column() # validate ID if not self.validate_id(self._df): logger.warning( "No value for 'entryID' was found in your data. Using index instead. Note, this will not be consistent across builds." ) self._df['entryID'] = self._df.index.astype(str) # log dupes dupes = return_dupes(self._df)
def test_data_obj_matches_sample(self): '''Check test pkl data is parsed and looks like ground truth SAMPLE_DATA_OBJ ''' parsed_data = parse(self.manifest, self.data) parsed_data_obj = parsed_data['data'].to_dict(orient='records') self.assertEqual(SAMPLE_DATA_OBJ, parsed_data_obj)
def test_data_df_matches_sample(self): '''Check test pkl data is parsed and looks like ground truth SAMPLE_DATA_DF ''' parsed_data = parse(self.manifest, self.data) self.assertTrue( parsed_data['data'].sort_index(axis=1).equals(SAMPLE_DATA_DF))
def test_missing_sheet(self): '''Check error raised for missing sheet in 'location' ''' with self.assertRaises(MissingResourceError): parse(self.manifest, self.missing_sheet_data)
def test_data_df_matches_sample(self): '''Check test XLSX data is parsed and looks like ground truth SAMPLE_DATA_DF ''' parsed_data = parse(self.manifest, self.data) self.assertTrue(parsed_data['data'].equals(SAMPLE_DATA_DF_BLANK))
def test_404(self): '''Test returns request exception from 404 ''' with self.assertRaises(RequestException): parse(self.json_manifest, 'https://www.google.com/foobar1')
def test_no_connection(self): '''Check can't connect to site ''' with self.assertRaises(MissingResourceError): parse(self.json_manifest, 'https://foo.bar')