def setUp(self):

        # create a couple Observations to use and a corresponding serializer
        self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')})
        self.el1_serializer = ObservationSerializer(self.el1)

        self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')})
        self.el2_serializer = ObservationSerializer(self.el2)

        # a duplicate of el1 above, for testing addition of duplicate elements:
        self.duplicate_element = Observation('sampleA', {})
        self.dup_element_serializer = ObservationSerializer(
            self.duplicate_element)

        # the correct serialized representation of an ElementSet instance
        self.expected_element_set_data = {
            'multiple': True,
            'elements': [self.el1_serializer.data, self.el2_serializer.data]
        }
        # a correctly formed instance of an ObservationSet
        self.element_set = ObservationSet([self.el1, self.el2])

        # the class that will execute the tests
        self.tester_class = ElementSetSerializerTester(
            ObservationSetSerializer)
    def setUp(self):
        # create a couple Observations to use
        self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')})

        self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')})

        # a duplicate of element above:
        self.duplicate_element = Observation('sampleA', {})

        # instantiate the class that will actually execute the tests
        self.tester_class = ElementSetTester(ObservationSet)
Example #3
0
 def test_observation_set_csv_converter(self):
     obs1 = Observation('foo')
     obs2 = Observation('bar')
     obs_set = ObservationSet([obs1, obs2])
     d = obs_set.to_dict()
     c = ObservationSetCsvConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': 'foo,bar'
     } == converted_input)
                     | ({
                         'xyz': 'bar,foo'
                     } == converted_input))
Example #4
0
def create_observation_set():
    # create a couple Observations to use and a corresponding serializer
    el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')})
    el1_serializer = ObservationSerializer(el1)

    el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')})
    el2_serializer = ObservationSerializer(el2)

    # the correct serialized representation of an ElementSet instance
    observation_set_data = {
        'multiple': True,
        'elements': [el1_serializer.data, el2_serializer.data]
    }
    return observation_set_data
Example #5
0
 def create(self, validated_data):
     '''
     Returns an Observation instance from the validated
     data.
     '''
     attr_dict = self._gather_attributes(validated_data)
     return Observation(validated_data['id'], attr_dict)
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        metadata = m.extract_metadata(resource_path)

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Example #7
0
 def test_metadata_correct(self):
     resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv')
     t = AnnotationTable()
     column_dict = {}
     obs_list = []
     for i, line in enumerate(open(resource_path)):
         if i == 0:
             contents = line.strip().split('\t')
             for j, c in enumerate(contents[1:]):
                 column_dict[j] = c
         else:
             contents = line.strip().split('\t')
             samplename = contents[0]
             attr_dict = {}
             for j, v in enumerate(contents[1:]):
                 attr = UnrestrictedStringAttribute(v)
                 attr_dict[column_dict[j]] = attr
             obs = Observation(samplename, attr_dict)
             obs_list.append(obs)
     expected_obs_set = ObservationSetSerializer(
         ObservationSet(obs_list)).data
     metadata = t.extract_metadata(resource_path, 'tsv')
     self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set)
     self.assertIsNone(metadata[FEATURE_SET_KEY])
     self.assertIsNone(metadata[PARENT_OP_KEY])
Example #8
0
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  However, here we don't validate.  Check that 
        it goes and collects the table in the process
        '''
        m = Matrix()
        resource_path = os.path.join(TESTDIR, 'test_matrix.tsv')
        metadata = m.extract_metadata(resource_path, 'tsv')

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        # Commented out when removed the feature metadata, as it was causing database
        # issues due to the size of the json object.
        #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
 def setUp(self):
     float_attr = FloatAttribute(0.01)
     int_attr = IntegerAttribute(3)
     self.demo_element = Observation('my_identifier', {
         'keyA': float_attr,
         'keyB': int_attr
     })
     # the class that will execute the tests
     self.tester_class = ElementTester(Observation)
Example #10
0
 def test_observation_set_list_converter(self):
     '''
     Tests that we get properly formatted JSON-compatible
     arrays (of strings in this case). Used when we need to
     supply a WDL job with a list of relevant samples as an
     array of strings, for instance.
     '''
     obs1 = Observation('foo')
     obs2 = Observation('bar')
     obs_set = ObservationSet([obs1, obs2])
     d = obs_set.to_dict()
     c = ObservationSetListConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': ['foo', 'bar']
     } == converted_input)
                     | ({
                         'xyz': ['bar', 'foo']
                     } == converted_input))
Example #11
0
    def extract_metadata(self, resource_path, parent_op_pk=None):

        super().extract_metadata(resource_path, parent_op_pk)

        # the FeatureSet comes from the rows:
        f_set = FeatureSet([Feature(x) for x in self.table.index])
        self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(
            f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
Example #12
0
    def extract_metadata(self,
                         resource_path,
                         file_extension,
                         parent_op_pk=None):

        super().extract_metadata(resource_path, file_extension, parent_op_pk)

        # Note: removed the addition of FeatureSets to the metadata as it was causing
        # issues with large json objects being inserted into the database.
        # the FeatureSet comes from the rows:
        # f_set = FeatureSet([Feature(x) for x in self.table.index])
        # self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
    def setUp(self):
        float_attr = FloatAttribute(0.01)
        int_attr = IntegerAttribute(3)
        boolean_attr = BooleanAttribute(True)
        bounded_float_attr = BoundedFloatAttribute(0.1, min=0.0, max=1.0)

        self.demo_element = Observation('my_identifier', {
            'keyA': float_attr,
            'keyB': int_attr
        })

        self.demo_element2 = Observation('my_identifier', {})

        self.demo_element_data = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Float',
                    'value': 0.01
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_data2 = {'id': 'my_identifier', 'attributes': {}}

        self.bad_element_data = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Float',
                    'value': 'abc'
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_data_w_bounds = {
            'id': 'my_identifier',
            'attributes': {
                'pvalue': {
                    'attribute_type': 'BoundedFloat',
                    'value': 0.1,
                    'min': 0.0,
                    'max': 1.0
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }
        self.demo_element_w_bounds = Observation('my_identifier', {
            'pvalue': bounded_float_attr,
            'keyB': int_attr
        })

        self.bad_demo_element_data_w_bounds = {
            'id': 'my_identifier',
            'attributes': {
                'pvalue': {
                    'attribute_type': 'BoundedFloat',
                    'value': 1.1,  # out of bounds!!
                    'min': 0.0,
                    'max': 1.0
                },
                'keyB': {
                    'attribute_type': 'Integer',
                    'value': 3
                }
            }
        }

        self.demo_element_w_bool = Observation('my_identifier', {
            'keyA': int_attr,
            'some_bool': boolean_attr
        })

        self.demo_element_data_w_bool1 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': 'true'
                }
            }
        }

        self.demo_element_data_w_bool2 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': 1
                }
            }
        }

        self.demo_element_data_w_bool3 = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': True
                }
            }
        }

        self.bad_demo_element_data_w_bool = {
            'id': 'my_identifier',
            'attributes': {
                'keyA': {
                    'attribute_type': 'Integer',
                    'value': 3
                },
                'some_bool': {
                    'attribute_type': 'Boolean',
                    'value': -1
                }
            }
        }

        # the class that will execute the tests
        self.tester_class = ElementSerializerTester(ObservationSerializer)
Example #14
0
    def setUp(self):
        self.establish_clients()

        self.new_resource1 = Resource.objects.create(
            name = 'foo.txt',
            owner = self.regular_user_1,
            is_active=True
        )
        self.new_resource2 = Resource.objects.create(
            name = 'bar.txt',
            owner = self.regular_user_1,
            is_active=True
        )
        self.new_resource3 = Resource.objects.create(
            name = 'baz.txt',
            owner = self.regular_user_1,
            is_active=True
        )

        # create a workspace to which we will eventually add resources
        self.workspace = Workspace.objects.create(
            owner = self.regular_user_1
        )

        self.empty_workspace = Workspace.objects.create(
            owner = self.regular_user_1
        )

        # create a few Observations to use with the different Resources
        obs1 = Observation('sampleA', {
            'phenotype': StringAttribute('WT')
        })
        obs1_serializer = ObservationSerializer(obs1)

        obs2 = Observation('sampleB', {
            'phenotype': StringAttribute('KO')
        })
        obs2_serializer = ObservationSerializer(obs2)

        obs3 = Observation('sampleC', {
            'phenotype': StringAttribute('KO')
        })
        obs3_serializer = ObservationSerializer(obs3)

        # create Features to use and a corresponding serializer
        feature1 = Feature('featureA', {
            'pathway': StringAttribute('foo')
        })
        feature1_serializer = FeatureSerializer(feature1)

        feature2 = Feature('featureB', {
            'pathway': StringAttribute('bar')
        })
        feature2_serializer = FeatureSerializer(feature2)

        feature3 = Feature('featureC', {
            'pathway': StringAttribute('bar3')
        })
        feature3_serializer = FeatureSerializer(feature3)

        feature4 = Feature('featureD', {
            'pathway': StringAttribute('bar')
        })
        feature4_serializer = FeatureSerializer(feature4)

        # create an ObservationSet for resource1
        observation_set_data1 = {
            'multiple': True,
            'elements': [
                obs1_serializer.data,
                obs2_serializer.data
            ]
        }
        # create an ObservationSet for resource2
        observation_set_data2 = {
            'multiple': True,
            'elements': [
                obs3_serializer.data,
            ]
        }

        # create a FeatureSet for resource1
        feature_set_data1 = {
            'multiple': True,
            'elements': [
                feature1_serializer.data,
                feature2_serializer.data
            ]
        }
        # create a FeatureSet for resource2
        feature_set_data2 = {
            'multiple': True,
            'elements': [
                feature3_serializer.data,
                feature4_serializer.data
            ]
        }

        metadata1 = {
            RESOURCE_KEY: self.new_resource1.pk,
            OBSERVATION_SET_KEY: observation_set_data1,
            FEATURE_SET_KEY: feature_set_data1,
            PARENT_OP_KEY: None
        }
        metadata2 = {
            RESOURCE_KEY: self.new_resource2.pk,
            OBSERVATION_SET_KEY: observation_set_data2,
            FEATURE_SET_KEY: feature_set_data2,
            PARENT_OP_KEY: None
        }
        rms1 = ResourceMetadataSerializer(data=metadata1)
        if rms1.is_valid(raise_exception=True):
            rms1.save()
        rms2 = ResourceMetadataSerializer(data=metadata2)
        if rms2.is_valid(raise_exception=True):
            rms2.save()