def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. However, here we don't validate. Check that it goes and collects the table in the process ''' m = Matrix() resource_path = os.path.join(TESTDIR, 'test_matrix.tsv') metadata = m.extract_metadata(resource_path) # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct_case1(self): ''' Typically, the metadata is collected following a successful validation. Do that here ''' m = Matrix() resource_path = os.path.join(TESTDIR, 'test_matrix.tsv') is_valid, err = m.validate_type(resource_path) self.assertTrue(is_valid) self.assertIsNone(err) # OK, the validation worked. Get metadata metadata = m.extract_metadata(resource_path) # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_table_without_rownames(self): ''' Tables without row names fails ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_rownames.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NUMBERED_ROW_NAMES_ERROR)
def test_table_without_header(self): ''' Tables without a header row fail ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_header.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NUMBERED_COLUMN_NAMES_ERROR)
def test_reads_table_without_gene_label(self): ''' Tables with a blank first column name are OK ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.no_gene_label.tsv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_reads_integer_table(self): ''' Tables of integers also pass validation ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_integer_matrix.tsv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_reads_float_table(self): ''' Capable of parsing a table of mixed numeric types ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_matrix.tsv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_reads_float_table_with_na(self): ''' Capable of parsing a table containing missing data ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_matrix.with_na.tsv')) self.assertTrue(is_valid) self.assertIsNone(err)
def test_incorrect_table(self): ''' Tests that a table with a string entry fails ''' m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_incorrect_matrix.tsv')) self.assertFalse(is_valid) bad_col_str = 'SW2_Control (column 2)' expected_err_str = NON_NUMERIC_ERROR.format(cols=bad_col_str) self.assertEqual(err, expected_err_str)
def test_with_real_files(self): ''' Runs the test with real files rather than providing mocked metadata. Not a true unit test, but whatever. ''' # get a workspace in our db: workspaces = Workspace.objects.filter(owner=self.regular_user_1) if len(workspaces) == 0: raise ImproperlyConfigured('Need at least one workspace.') workspace = None for w in workspaces: workspace_resources = w.resources.filter(is_active = True) if len(workspace_resources) >= 2: workspace = w if workspace is None: raise ImproperlyConfigured('Need at least two resources that' ' are in a workspace. Modify the test database' ) # we will attach the metadata to two resources: all_resources = workspace.resources.all() all_resources = [x for x in all_resources if x.is_active] TESTDIR = os.path.dirname(__file__) TESTDIR = os.path.join(TESTDIR, 'resource_validation_test_files') resource_path = os.path.join(TESTDIR, 'deseq_results_example_concat.tsv') self.assertTrue(os.path.exists(resource_path)) t = FeatureTable() metadata0 = t.extract_metadata(resource_path, 'tsv') r0 = all_resources[0] add_metadata_to_resource(r0, metadata0) resource_path = os.path.join(TESTDIR, 'test_matrix.tsv') self.assertTrue(os.path.exists(resource_path)) m = Matrix() metadata1 = m.extract_metadata(resource_path, 'tsv') r1 = all_resources[1] add_metadata_to_resource(r1, metadata1) url = reverse( 'workspace-observations-metadata', kwargs={'workspace_pk':workspace.pk} ) response = self.authenticated_regular_client.get(url) response_json = response.json()
def test_duplicate_rownames_fails(self): m = Matrix() is_valid, err = m.validate_type( os.path.join(TESTDIR, 'test_matrix.duplicate_rownames.tsv')) self.assertFalse(is_valid) self.assertEqual(err, NONUNIQUE_ROW_NAMES_ERROR)