def setUp(self):
        # create a couple Featires to use
        self.el1 = Feature('geneA', {'oncogene': StringAttribute('Y')})

        self.el2 = Feature('sampleB', {'oncogene': StringAttribute('N')})

        # a duplicate of element above:
        self.duplicate_element = Feature('geneA', {})

        # instantiate the class that will actually execute the tests
        self.tester_class = ElementSetTester(FeatureSet)
Example #2
0
 def test_feature_set_csv_converter(self):
     f1 = Feature('foo')
     f2 = Feature('bar')
     f_set = FeatureSet([f1, f2])
     d = f_set.to_dict()
     c = FeatureSetCsvConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': 'foo,bar'
     } == converted_input)
                     | ({
                         'xyz': 'bar,foo'
                     } == converted_input))
Example #3
0
def create_feature_set():
    # create a couple Features to use and a corresponding serializer
    el1 = Feature('featureA', {'pathway': StringAttribute('foo')})
    el1_serializer = FeatureSerializer(el1)

    el2 = Feature('sampleB', {'pathway': StringAttribute('bar')})
    el2_serializer = FeatureSerializer(el2)

    # the correct serialized representation of an ElementSet instance
    feature_set_data = {
        'multiple': True,
        'elements': [el1_serializer.data, el2_serializer.data]
    }
    return feature_set_data
Example #4
0
 def create(self, validated_data):
     '''
     Returns a Feature instance from the validated
     data.
     '''
     attr_dict = self._gather_attributes(validated_data)
     return Feature(validated_data['id'], attr_dict)
    def test_metadata_correct(self):
        resource_path = os.path.join(TESTDIR, 'gene_annotations.tsv')
        t = FeatureTable()
        column_dict = {}
        feature_list = []
        for i, line in enumerate(open(resource_path)):
            if i == 0:
                contents = line.strip().split('\t')
                for j,c in enumerate(contents[1:]):
                    column_dict[j] = c
            else:
                contents = line.strip().split('\t')
                gene_name = contents[0]
                attr_dict = {}
                for j,v in enumerate(contents[1:]):
                    try:
                        v = int(v)
                        attr = IntegerAttribute(v)
                    except ValueError:
                        attr = StringAttribute(v)

                    attr_dict[column_dict[j]] = attr
                f = Feature(gene_name, attr_dict)
                feature_list.append(f)
        expected_feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data
        metadata = t.extract_metadata(resource_path)
        self.assertEqual(metadata[FEATURE_SET_KEY], expected_feature_set)
        self.assertIsNone(metadata[OBSERVATION_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        metadata = m.extract_metadata(resource_path)

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Example #7
0
    def test_metadata_correct(self):
        resource_path = os.path.join(TESTDIR, 'gene_annotations.tsv')
        t = FeatureTable()
        column_dict = {}
        feature_list = []
        for i, line in enumerate(open(resource_path)):
            if i == 0:
                contents = line.strip().split('\t')
                for j, c in enumerate(contents[1:]):
                    column_dict[j] = c
            else:
                contents = line.strip().split('\t')
                gene_name = contents[0]
                attr_dict = {}
                for j, v in enumerate(contents[1:]):
                    try:
                        v = int(v)
                        attr = IntegerAttribute(v)
                    except ValueError:
                        attr = StringAttribute(v)

                    attr_dict[column_dict[j]] = attr
                f = Feature(gene_name, attr_dict)
                feature_list.append(f)
        expected_feature_set = FeatureSetSerializer(
            FeatureSet(feature_list)).data
        metadata = t.extract_metadata(resource_path, 'tsv')
        # Commented out when we removed the automatic creation of Feature metadata
        # for FeatureTable resource types. For large files, it was causing issues
        # with exceptionally large JSON failing to store in db table.
        #self.assertEqual(metadata[FEATURE_SET_KEY], expected_feature_set)
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[OBSERVATION_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Example #8
0
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  However, here we don't validate.  Check that 
        it goes and collects the table in the process
        '''
        m = Matrix()
        resource_path = os.path.join(TESTDIR, 'test_matrix.tsv')
        metadata = m.extract_metadata(resource_path, 'tsv')

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        # Commented out when removed the feature metadata, as it was causing database
        # issues due to the size of the json object.
        #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
 def setUp(self):
     float_attr = FloatAttribute(0.01)
     int_attr = IntegerAttribute(3)
     self.demo_element = Feature('my_identifier', {
         'keyA': float_attr,
         'keyB': int_attr
     })
     # the class that will execute the tests
     self.tester_class = ElementTester(Feature)
Example #10
0
 def test_feature_set_list_converter(self):
     '''
     Tests that we get properly formatted JSON-compatible
     arrays (of strings in this case). Used when we need to
     supply a WDL job with a list of relevant samples as an
     array of strings, for instance.
     '''
     obs1 = Feature('foo')
     obs2 = Feature('bar')
     obs_set = FeatureSet([obs1, obs2])
     d = obs_set.to_dict()
     c = FeatureSetListConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': ['foo', 'bar']
     } == converted_input)
                     | ({
                         'xyz': ['bar', 'foo']
                     } == converted_input))
Example #11
0
    def extract_metadata(self, resource_path, parent_op_pk=None):

        super().extract_metadata(resource_path, parent_op_pk)

        # the FeatureSet comes from the rows:
        f_set = FeatureSet([Feature(x) for x in self.table.index])
        self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(
            f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
    def setUp(self):

        # create a couple Features to use and a corresponding serializer
        self.el1 = Feature('geneA', {'oncogene': StringAttribute('WT')})
        self.el1_serializer = FeatureSerializer(self.el1)

        self.el2 = Feature('geneB', {'oncogene': StringAttribute('KO')})
        self.el2_serializer = FeatureSerializer(self.el2)

        # a duplicate of el1 above, for testing addition of duplicate elements:
        self.duplicate_element = Feature('geneA', {})
        self.dup_element_serializer = FeatureSerializer(self.duplicate_element)

        # the correct serialized representation of an ElementSet instance
        self.expected_element_set_data = {
            'multiple': True,
            'elements': [self.el1_serializer.data, self.el2_serializer.data]
        }
        # a correctly formed instance of an FeatureSet
        self.element_set = FeatureSet([self.el1, self.el2])

        # the class that will execute the tests
        self.tester_class = ElementSetSerializerTester(FeatureSetSerializer)
 def test_merge_of_different_types_fails(self):
     '''
     We cannot merge two different types (e.g. and Obs Set and Feat. Set)
     Test that it raises an exception.
     '''
     element_list1 = [self.el1, self.el2]
     some_feature = Feature('geneA', {'oncogene': StringAttribute('Y')})
     element_list2 = [
         some_feature,
     ]
     obs_set = ObservationSet(element_list1)
     feature_set = FeatureSet(element_list2)
     with self.assertRaises(Exception):
         new_set = merge_element_set([obs_set, feature_set])
    def setUp(self):
        float_attr = FloatAttribute(0.01)
        int_attr = IntegerAttribute(3)
        boolean_attr = BooleanAttribute(True)
        bounded_float_attr = BoundedFloatAttribute(0.1, min=0.0, max=1.0)

        self.demo_element = Feature('my_identifier', {
            'keyA': float_attr,
            'keyB': int_attr
        })

        self.demo_element2 = Feature('my_identifier', {})

        self.demo_element_data = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Float',
                    'value': 0.01
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_data2 = {'id': 'my_identifier', 'attributes': {}}

        self.bad_element_data = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Float',
                    'value': 'abc'
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_data_w_bounds = {
            'id': 'my_identifier',
            'attributes': {
                'pvalue': {
                    'attribute_type': 'BoundedFloat',
                    'value': 0.1,
                    'min': 0.0,
                    'max': 1.0
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }
        self.demo_element_w_bounds = Feature('my_identifier', {
            'pvalue': bounded_float_attr,
            'keyB': int_attr
        })

        self.bad_demo_element_data_w_bounds = {
            'id': 'my_identifier',
            'attributes': {
                'pvalue': {
                    'attribute_type': 'BoundedFloat',
                    'value': 1.1,  # out of bounds!!
                    'min': 0.0,
                    'max': 1.0
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_w_bool = Feature('my_identifier', {
            'keyA': int_attr,
            'some_bool': boolean_attr
        })

        self.demo_element_data_w_bool1 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': 'true'
                }
            }
        }

        self.demo_element_data_w_bool2 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': 1
                }
            }
        }

        self.demo_element_data_w_bool3 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': True
                }
            }
        }

        self.bad_demo_element_data_w_bool = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': -1
                }
            }
        }
        # the class that will execute the tests
        self.tester_class = ElementSerializerTester(FeatureSerializer)
Example #15
0
    def setUp(self):
        self.establish_clients()

        self.new_resource1 = Resource.objects.create(
            name = 'foo.txt',
            owner = self.regular_user_1,
            is_active=True
        )
        self.new_resource2 = Resource.objects.create(
            name = 'bar.txt',
            owner = self.regular_user_1,
            is_active=True
        )
        self.new_resource3 = Resource.objects.create(
            name = 'baz.txt',
            owner = self.regular_user_1,
            is_active=True
        )

        # create a workspace to which we will eventually add resources
        self.workspace = Workspace.objects.create(
            owner = self.regular_user_1
        )

        self.empty_workspace = Workspace.objects.create(
            owner = self.regular_user_1
        )

        # create a few Observations to use with the different Resources
        obs1 = Observation('sampleA', {
            'phenotype': StringAttribute('WT')
        })
        obs1_serializer = ObservationSerializer(obs1)

        obs2 = Observation('sampleB', {
            'phenotype': StringAttribute('KO')
        })
        obs2_serializer = ObservationSerializer(obs2)

        obs3 = Observation('sampleC', {
            'phenotype': StringAttribute('KO')
        })
        obs3_serializer = ObservationSerializer(obs3)

        # create Features to use and a corresponding serializer
        feature1 = Feature('featureA', {
            'pathway': StringAttribute('foo')
        })
        feature1_serializer = FeatureSerializer(feature1)

        feature2 = Feature('featureB', {
            'pathway': StringAttribute('bar')
        })
        feature2_serializer = FeatureSerializer(feature2)

        feature3 = Feature('featureC', {
            'pathway': StringAttribute('bar3')
        })
        feature3_serializer = FeatureSerializer(feature3)

        feature4 = Feature('featureD', {
            'pathway': StringAttribute('bar')
        })
        feature4_serializer = FeatureSerializer(feature4)

        # create an ObservationSet for resource1
        observation_set_data1 = {
            'multiple': True,
            'elements': [
                obs1_serializer.data,
                obs2_serializer.data
            ]
        }
        # create an ObservationSet for resource2
        observation_set_data2 = {
            'multiple': True,
            'elements': [
                obs3_serializer.data,
            ]
        }

        # create a FeatureSet for resource1
        feature_set_data1 = {
            'multiple': True,
            'elements': [
                feature1_serializer.data,
                feature2_serializer.data
            ]
        }
        # create a FeatureSet for resource2
        feature_set_data2 = {
            'multiple': True,
            'elements': [
                feature3_serializer.data,
                feature4_serializer.data
            ]
        }

        metadata1 = {
            RESOURCE_KEY: self.new_resource1.pk,
            OBSERVATION_SET_KEY: observation_set_data1,
            FEATURE_SET_KEY: feature_set_data1,
            PARENT_OP_KEY: None
        }
        metadata2 = {
            RESOURCE_KEY: self.new_resource2.pk,
            OBSERVATION_SET_KEY: observation_set_data2,
            FEATURE_SET_KEY: feature_set_data2,
            PARENT_OP_KEY: None
        }
        rms1 = ResourceMetadataSerializer(data=metadata1)
        if rms1.is_valid(raise_exception=True):
            rms1.save()
        rms2 = ResourceMetadataSerializer(data=metadata2)
        if rms2.is_valid(raise_exception=True):
            rms2.save()