def setUp(self): # create a couple Observations to use and a corresponding serializer self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) self.el1_serializer = ObservationSerializer(self.el1) self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) self.el2_serializer = ObservationSerializer(self.el2) # a duplicate of el1 above, for testing addition of duplicate elements: self.duplicate_element = Observation('sampleA', {}) self.dup_element_serializer = ObservationSerializer( self.duplicate_element) # the correct serialized representation of an ElementSet instance self.expected_element_set_data = { 'multiple': True, 'elements': [self.el1_serializer.data, self.el2_serializer.data] } # a correctly formed instance of an ObservationSet self.element_set = ObservationSet([self.el1, self.el2]) # the class that will execute the tests self.tester_class = ElementSetSerializerTester( ObservationSetSerializer)
def setUp(self): # create a couple Observations to use self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) # a duplicate of element above: self.duplicate_element = Observation('sampleA', {}) # instantiate the class that will actually execute the tests self.tester_class = ElementSetTester(ObservationSet)
def setUp(self): # create a couple Featires to use self.el1 = Feature('geneA', {'oncogene': StringAttribute('Y')}) self.el2 = Feature('sampleB', {'oncogene': StringAttribute('N')}) # a duplicate of element above: self.duplicate_element = Feature('geneA', {}) # instantiate the class that will actually execute the tests self.tester_class = ElementSetTester(FeatureSet)
def test_string_attribute(self): # this is sort of double test-coverage, but that can't hurt s = StringAttribute('abc') self.assertEqual(s.value, 'abc') s = StringAttribute('a string with space') self.assertEqual(s.value, 'a_string_with_space') with self.assertRaises(AttributeValueError): StringAttribute('-9abc') with self.assertRaises(AttributeValueError): StringAttribute(3.4)
def create_feature_set(): # create a couple Features to use and a corresponding serializer el1 = Feature('featureA', {'pathway': StringAttribute('foo')}) el1_serializer = FeatureSerializer(el1) el2 = Feature('sampleB', {'pathway': StringAttribute('bar')}) el2_serializer = FeatureSerializer(el2) # the correct serialized representation of an ElementSet instance feature_set_data = { 'multiple': True, 'elements': [el1_serializer.data, el2_serializer.data] } return feature_set_data
def create_observation_set(): # create a couple Observations to use and a corresponding serializer el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) el1_serializer = ObservationSerializer(el1) el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) el2_serializer = ObservationSerializer(el2) # the correct serialized representation of an ElementSet instance observation_set_data = { 'multiple': True, 'elements': [el1_serializer.data, el2_serializer.data] } return observation_set_data
def test_metadata_correct(self): resource_path = os.path.join(TESTDIR, 'gene_annotations.tsv') t = FeatureTable() column_dict = {} feature_list = [] for i, line in enumerate(open(resource_path)): if i == 0: contents = line.strip().split('\t') for j,c in enumerate(contents[1:]): column_dict[j] = c else: contents = line.strip().split('\t') gene_name = contents[0] attr_dict = {} for j,v in enumerate(contents[1:]): try: v = int(v) attr = IntegerAttribute(v) except ValueError: attr = StringAttribute(v) attr_dict[column_dict[j]] = attr f = Feature(gene_name, attr_dict) feature_list.append(f) expected_feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data metadata = t.extract_metadata(resource_path) self.assertEqual(metadata[FEATURE_SET_KEY], expected_feature_set) self.assertIsNone(metadata[OBSERVATION_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct(self): resource_path = os.path.join(TESTDIR, 'gene_annotations.tsv') t = FeatureTable() column_dict = {} feature_list = [] for i, line in enumerate(open(resource_path)): if i == 0: contents = line.strip().split('\t') for j, c in enumerate(contents[1:]): column_dict[j] = c else: contents = line.strip().split('\t') gene_name = contents[0] attr_dict = {} for j, v in enumerate(contents[1:]): try: v = int(v) attr = IntegerAttribute(v) except ValueError: attr = StringAttribute(v) attr_dict[column_dict[j]] = attr f = Feature(gene_name, attr_dict) feature_list.append(f) expected_feature_set = FeatureSetSerializer( FeatureSet(feature_list)).data metadata = t.extract_metadata(resource_path, 'tsv') # Commented out when we removed the automatic creation of Feature metadata # for FeatureTable resource types. For large files, it was causing issues # with exceptionally large JSON failing to store in db table. #self.assertEqual(metadata[FEATURE_SET_KEY], expected_feature_set) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[OBSERVATION_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_merge_of_different_types_fails(self): ''' We cannot merge two different types (e.g. and Obs Set and Feat. Set) Test that it raises an exception. ''' element_list1 = [self.el1, self.el2] some_feature = Feature('geneA', {'oncogene': StringAttribute('Y')}) element_list2 = [ some_feature, ] obs_set = ObservationSet(element_list1) feature_set = FeatureSet(element_list2) with self.assertRaises(Exception): new_set = merge_element_set([obs_set, feature_set])
def test_adding_new_attribute(self, testcase): ''' Test adding a new attribute ''' float_attr = FloatAttribute(0.01) int_attr = IntegerAttribute(3) element = self.element_class('some_identifier', { 'keyA': float_attr, 'keyB': int_attr }) element.add_attribute('keyC', { 'attribute_type': 'String', 'value': 'abc' }) expected_attr = StringAttribute('abc') testcase.assertEqual(element.attributes['keyC'], expected_attr) expected_keys = set(['keyA', 'keyB', 'keyC']) existing_keys = set(element.attributes.keys()) testcase.assertTrue(expected_keys == existing_keys)
def test_metadata_correct(self): resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv') t = AnnotationTable() column_dict = {} obs_list = [] for i, line in enumerate(open(resource_path)): if i == 0: contents = line.strip().split('\t') for j,c in enumerate(contents[1:]): column_dict[j] = c else: contents = line.strip().split('\t') samplename = contents[0] attr_dict = {} for j,v in enumerate(contents[1:]): attr = StringAttribute(v) attr_dict[column_dict[j]] = attr obs = Observation(samplename, attr_dict) obs_list.append(obs) expected_obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data metadata = t.extract_metadata(resource_path) self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def convert(self, input_key, user_input, op_dir, staging_dir): s = StringAttribute(user_input) return {input_key: s.value}
def setUp(self): self.establish_clients() self.new_resource1 = Resource.objects.create( name = 'foo.txt', owner = self.regular_user_1, is_active=True ) self.new_resource2 = Resource.objects.create( name = 'bar.txt', owner = self.regular_user_1, is_active=True ) self.new_resource3 = Resource.objects.create( name = 'baz.txt', owner = self.regular_user_1, is_active=True ) # create a workspace to which we will eventually add resources self.workspace = Workspace.objects.create( owner = self.regular_user_1 ) self.empty_workspace = Workspace.objects.create( owner = self.regular_user_1 ) # create a few Observations to use with the different Resources obs1 = Observation('sampleA', { 'phenotype': StringAttribute('WT') }) obs1_serializer = ObservationSerializer(obs1) obs2 = Observation('sampleB', { 'phenotype': StringAttribute('KO') }) obs2_serializer = ObservationSerializer(obs2) obs3 = Observation('sampleC', { 'phenotype': StringAttribute('KO') }) obs3_serializer = ObservationSerializer(obs3) # create Features to use and a corresponding serializer feature1 = Feature('featureA', { 'pathway': StringAttribute('foo') }) feature1_serializer = FeatureSerializer(feature1) feature2 = Feature('featureB', { 'pathway': StringAttribute('bar') }) feature2_serializer = FeatureSerializer(feature2) feature3 = Feature('featureC', { 'pathway': StringAttribute('bar3') }) feature3_serializer = FeatureSerializer(feature3) feature4 = Feature('featureD', { 'pathway': StringAttribute('bar') }) feature4_serializer = FeatureSerializer(feature4) # create an ObservationSet for resource1 observation_set_data1 = { 'multiple': True, 'elements': [ obs1_serializer.data, obs2_serializer.data ] } # create an ObservationSet for resource2 observation_set_data2 = { 'multiple': True, 'elements': [ obs3_serializer.data, ] } # create a FeatureSet for resource1 feature_set_data1 = { 'multiple': True, 'elements': [ feature1_serializer.data, feature2_serializer.data ] } # create a FeatureSet for resource2 feature_set_data2 = { 'multiple': True, 'elements': [ feature3_serializer.data, feature4_serializer.data ] } metadata1 = { RESOURCE_KEY: self.new_resource1.pk, OBSERVATION_SET_KEY: observation_set_data1, FEATURE_SET_KEY: feature_set_data1, PARENT_OP_KEY: None } metadata2 = { RESOURCE_KEY: self.new_resource2.pk, OBSERVATION_SET_KEY: observation_set_data2, FEATURE_SET_KEY: feature_set_data2, PARENT_OP_KEY: None } rms1 = ResourceMetadataSerializer(data=metadata1) if rms1.is_valid(raise_exception=True): rms1.save() rms2 = ResourceMetadataSerializer(data=metadata2) if rms2.is_valid(raise_exception=True): rms2.save()