def setUp(self): # create a couple Observations to use and a corresponding serializer self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) self.el1_serializer = ObservationSerializer(self.el1) self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) self.el2_serializer = ObservationSerializer(self.el2) # a duplicate of el1 above, for testing addition of duplicate elements: self.duplicate_element = Observation('sampleA', {}) self.dup_element_serializer = ObservationSerializer( self.duplicate_element) # the correct serialized representation of an ElementSet instance self.expected_element_set_data = { 'multiple': True, 'elements': [self.el1_serializer.data, self.el2_serializer.data] } # a correctly formed instance of an ObservationSet self.element_set = ObservationSet([self.el1, self.el2]) # the class that will execute the tests self.tester_class = ElementSetSerializerTester( ObservationSetSerializer)
def setUp(self): # create a couple Observations to use self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) # a duplicate of element above: self.duplicate_element = Observation('sampleA', {}) # instantiate the class that will actually execute the tests self.tester_class = ElementSetTester(ObservationSet)
def test_observation_set_csv_converter(self): obs1 = Observation('foo') obs2 = Observation('bar') obs_set = ObservationSet([obs1, obs2]) d = obs_set.to_dict() c = ObservationSetCsvConverter() # order doesn't matter, so need to check both orders: converted_input = c.convert('xyz', d, '', '') self.assertTrue(({ 'xyz': 'foo,bar' } == converted_input) | ({ 'xyz': 'bar,foo' } == converted_input))
def create_observation_set(): # create a couple Observations to use and a corresponding serializer el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')}) el1_serializer = ObservationSerializer(el1) el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')}) el2_serializer = ObservationSerializer(el2) # the correct serialized representation of an ElementSet instance observation_set_data = { 'multiple': True, 'elements': [el1_serializer.data, el2_serializer.data] } return observation_set_data
def create(self, validated_data): ''' Returns an Observation instance from the validated data. ''' attr_dict = self._gather_attributes(validated_data) return Observation(validated_data['id'], attr_dict)
def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. Do that here ''' m = IntegerMatrix() resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv') metadata = m.extract_metadata(resource_path) # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct(self): resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv') t = AnnotationTable() column_dict = {} obs_list = [] for i, line in enumerate(open(resource_path)): if i == 0: contents = line.strip().split('\t') for j, c in enumerate(contents[1:]): column_dict[j] = c else: contents = line.strip().split('\t') samplename = contents[0] attr_dict = {} for j, v in enumerate(contents[1:]): attr = UnrestrictedStringAttribute(v) attr_dict[column_dict[j]] = attr obs = Observation(samplename, attr_dict) obs_list.append(obs) expected_obs_set = ObservationSetSerializer( ObservationSet(obs_list)).data metadata = t.extract_metadata(resource_path, 'tsv') self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def test_metadata_correct_case2(self): ''' Typically, the metadata is collected following a successful validation. However, here we don't validate. Check that it goes and collects the table in the process ''' m = Matrix() resource_path = os.path.join(TESTDIR, 'test_matrix.tsv') metadata = m.extract_metadata(resource_path, 'tsv') # Parse the test file to ensure we extracted the right content. line = open(resource_path).readline() contents = line.strip().split('\t') samplenames = contents[1:] obs_list = [Observation(x) for x in samplenames] gene_list = [] for i, line in enumerate(open(resource_path)): if i > 0: g = line.split('\t')[0] gene_list.append(g) feature_list = [Feature(x) for x in gene_list] obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY]) # Commented out when removed the feature metadata, as it was causing database # issues due to the size of the json object. #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[FEATURE_SET_KEY]) self.assertIsNone(metadata[PARENT_OP_KEY])
def setUp(self): float_attr = FloatAttribute(0.01) int_attr = IntegerAttribute(3) self.demo_element = Observation('my_identifier', { 'keyA': float_attr, 'keyB': int_attr }) # the class that will execute the tests self.tester_class = ElementTester(Observation)
def test_observation_set_list_converter(self): ''' Tests that we get properly formatted JSON-compatible arrays (of strings in this case). Used when we need to supply a WDL job with a list of relevant samples as an array of strings, for instance. ''' obs1 = Observation('foo') obs2 = Observation('bar') obs_set = ObservationSet([obs1, obs2]) d = obs_set.to_dict() c = ObservationSetListConverter() # order doesn't matter, so need to check both orders: converted_input = c.convert('xyz', d, '', '') self.assertTrue(({ 'xyz': ['foo', 'bar'] } == converted_input) | ({ 'xyz': ['bar', 'foo'] } == converted_input))
def extract_metadata(self, resource_path, parent_op_pk=None): super().extract_metadata(resource_path, parent_op_pk) # the FeatureSet comes from the rows: f_set = FeatureSet([Feature(x) for x in self.table.index]) self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer( f_set).data # the ObservationSet comes from the cols: o_set = ObservationSet([Observation(x) for x in self.table.columns]) self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer( o_set).data return self.metadata
def extract_metadata(self, resource_path, file_extension, parent_op_pk=None): super().extract_metadata(resource_path, file_extension, parent_op_pk) # Note: removed the addition of FeatureSets to the metadata as it was causing # issues with large json objects being inserted into the database. # the FeatureSet comes from the rows: # f_set = FeatureSet([Feature(x) for x in self.table.index]) # self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(f_set).data # the ObservationSet comes from the cols: o_set = ObservationSet([Observation(x) for x in self.table.columns]) self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer( o_set).data return self.metadata
def setUp(self): float_attr = FloatAttribute(0.01) int_attr = IntegerAttribute(3) boolean_attr = BooleanAttribute(True) bounded_float_attr = BoundedFloatAttribute(0.1, min=0.0, max=1.0) self.demo_element = Observation('my_identifier', { 'keyA': float_attr, 'keyB': int_attr }) self.demo_element2 = Observation('my_identifier', {}) self.demo_element_data = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Float', 'value': 0.01 }, 'keyB': { 'attribute_type': 'Integer', 'value': 3 } } } self.demo_element_data2 = {'id': 'my_identifier', 'attributes': {}} self.bad_element_data = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Float', 'value': 'abc' }, 'keyB': { 'attribute_type': 'Integer', 'value': 3 } } } self.demo_element_data_w_bounds = { 'id': 'my_identifier', 'attributes': { 'pvalue': { 'attribute_type': 'BoundedFloat', 'value': 0.1, 'min': 0.0, 'max': 1.0 }, 'keyB': { 'attribute_type': 'Integer', 'value': 3 } } } self.demo_element_w_bounds = Observation('my_identifier', { 'pvalue': bounded_float_attr, 'keyB': int_attr }) self.bad_demo_element_data_w_bounds = { 'id': 'my_identifier', 'attributes': { 'pvalue': { 'attribute_type': 'BoundedFloat', 'value': 1.1, # out of bounds!! 'min': 0.0, 'max': 1.0 }, 'keyB': { 'attribute_type': 'Integer', 'value': 3 } } } self.demo_element_w_bool = Observation('my_identifier', { 'keyA': int_attr, 'some_bool': boolean_attr }) self.demo_element_data_w_bool1 = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Integer', 'value': 3 }, 'some_bool': { 'attribute_type': 'Boolean', 'value': 'true' } } } self.demo_element_data_w_bool2 = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Integer', 'value': 3 }, 'some_bool': { 'attribute_type': 'Boolean', 'value': 1 } } } self.demo_element_data_w_bool3 = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Integer', 'value': 3 }, 'some_bool': { 'attribute_type': 'Boolean', 'value': True } } } self.bad_demo_element_data_w_bool = { 'id': 'my_identifier', 'attributes': { 'keyA': { 'attribute_type': 'Integer', 'value': 3 }, 'some_bool': { 'attribute_type': 'Boolean', 'value': -1 } } } # the class that will execute the tests self.tester_class = ElementSerializerTester(ObservationSerializer)
def setUp(self): self.establish_clients() self.new_resource1 = Resource.objects.create( name = 'foo.txt', owner = self.regular_user_1, is_active=True ) self.new_resource2 = Resource.objects.create( name = 'bar.txt', owner = self.regular_user_1, is_active=True ) self.new_resource3 = Resource.objects.create( name = 'baz.txt', owner = self.regular_user_1, is_active=True ) # create a workspace to which we will eventually add resources self.workspace = Workspace.objects.create( owner = self.regular_user_1 ) self.empty_workspace = Workspace.objects.create( owner = self.regular_user_1 ) # create a few Observations to use with the different Resources obs1 = Observation('sampleA', { 'phenotype': StringAttribute('WT') }) obs1_serializer = ObservationSerializer(obs1) obs2 = Observation('sampleB', { 'phenotype': StringAttribute('KO') }) obs2_serializer = ObservationSerializer(obs2) obs3 = Observation('sampleC', { 'phenotype': StringAttribute('KO') }) obs3_serializer = ObservationSerializer(obs3) # create Features to use and a corresponding serializer feature1 = Feature('featureA', { 'pathway': StringAttribute('foo') }) feature1_serializer = FeatureSerializer(feature1) feature2 = Feature('featureB', { 'pathway': StringAttribute('bar') }) feature2_serializer = FeatureSerializer(feature2) feature3 = Feature('featureC', { 'pathway': StringAttribute('bar3') }) feature3_serializer = FeatureSerializer(feature3) feature4 = Feature('featureD', { 'pathway': StringAttribute('bar') }) feature4_serializer = FeatureSerializer(feature4) # create an ObservationSet for resource1 observation_set_data1 = { 'multiple': True, 'elements': [ obs1_serializer.data, obs2_serializer.data ] } # create an ObservationSet for resource2 observation_set_data2 = { 'multiple': True, 'elements': [ obs3_serializer.data, ] } # create a FeatureSet for resource1 feature_set_data1 = { 'multiple': True, 'elements': [ feature1_serializer.data, feature2_serializer.data ] } # create a FeatureSet for resource2 feature_set_data2 = { 'multiple': True, 'elements': [ feature3_serializer.data, feature4_serializer.data ] } metadata1 = { RESOURCE_KEY: self.new_resource1.pk, OBSERVATION_SET_KEY: observation_set_data1, FEATURE_SET_KEY: feature_set_data1, PARENT_OP_KEY: None } metadata2 = { RESOURCE_KEY: self.new_resource2.pk, OBSERVATION_SET_KEY: observation_set_data2, FEATURE_SET_KEY: feature_set_data2, PARENT_OP_KEY: None } rms1 = ResourceMetadataSerializer(data=metadata1) if rms1.is_valid(raise_exception=True): rms1.save() rms2 = ResourceMetadataSerializer(data=metadata2) if rms2.is_valid(raise_exception=True): rms2.save()