def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. Do that here ''' m = IntegerMatrix() resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv') metadata = m.extract_metadata(resource_path) # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_fails_with_float_table(self): ''' Capable of parsing a table of mixed numeric types ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_matrix.tsv')) self.assertFalse(is_valid)
def test_excel_parses_correctly(self): ''' Test that we can parse an excel spreadsheet provided the data is contained in the first sheet ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.xlsx')) self.assertTrue(is_valid)
def test_table_without_rownames(self): ''' Tables without row names fails ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_rownames.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NUMBERED_ROW_NAMES_ERROR)
def test_reads_table_without_gene_label(self): ''' Tables with a blank first column name are OK ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_gene_label.tsv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_table_without_header(self): ''' Tables without a header row fail ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_header.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NUMBERED_COLUMN_NAMES_ERROR)
def test_excel_fails_if_not_in_first_sheet(self): ''' If the data is contained on a different sheet than "the first" the table is empty. If the first sheet contained data, then there's really nothing we can do to correct that. ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.second_sheet.xlsx')) self.assertFalse(is_valid) self.assertEqual(err, EMPTY_TABLE_ERROR)
def test_fails_parsing_int_table_with_na(self): ''' Here, we take a NaN value which would be typically handled gracefully as in the TestIntegerMatrix.test_reads_int_table_with_na test function above. However, we also put a non-integer in the same column to test that the special case handling is working properly. ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.with_na_and_float.csv')) self.assertFalse(is_valid)
def test_reads_int_table_with_na(self): ''' Capable of parsing an integer table containing missing data. Note that this requires some special handling since NaN's force the column to be parsed as a float, even if all other values in the column are integers ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.with_na.csv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_fails_if_filetype_incorrect_case2(self): ''' If a user specifies TSV but the file is, in fact, a CSV, we fail out. We are unable to decipher (without looking at the table) that it was due to an incorrect file extension, but the file still fails validation. ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_csv_integer_matrix_labeled_as_tsv.tsv')) self.assertFalse(is_valid)
def test_fails_parsing_int_table_with_na_and_float(self): ''' Here, we take a NaN value which would be typically handled gracefully as in the TestIntegerMatrix.test_reads_int_table_with_na test function above. However, we also put a non-integer in a different column (5) to test that the special case handling is working properly. ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.with_multiple_na_and_float.csv')) self.assertFalse(is_valid) bad_col_str = 'SW5_Treated (column 5)' expected_err_str = NON_INTEGER_ERROR.format(cols=bad_col_str) self.assertEqual(err, expected_err_str)
def test_metadata_correct_case1(self): ''' Typically, the metadata is collected following a successful validation. Do that here ''' m = IntegerMatrix() resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv') is_valid, err = m.validate_type(resource_path, 'tsv') self.assertTrue(is_valid) self.assertIsNone(err) # OK, the validation worked. Get metadata metadata = m.extract_metadata(resource_path, 'tsv') # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) # Commented out when removed the feature metadata, as it was causing database # issues due to the size of the json object. #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_reads_integer_table(self): ''' Tables of integers pass validation ''' m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.tsv')) self.assertTrue(is_valid) self.assertIsNone(err) m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.csv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_duplicate_rownames_fails(self): m = IntegerMatrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_matrix.duplicate_rownames.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NONUNIQUE_ROW_NAMES_ERROR)