def test_nullable_observation_set_serializer(self):
        '''
        Depending on our needs, we can permit attributes of Observation
        instances to have null values. Therefore, we created the NullableObservationSet
        class which holds a set (`elements`) of NullableObservations.

        Check that it works and that the original, non-nullable implementation rejects
        the null value
        '''
        data = {
            'multiple':
            True,
            'elements': [{
                'id': 'foo',
                'attributes': {
                    'keyA': {
                        'attribute_type': 'String',
                        'value': None
                    }
                }
            }, {
                'id': 'bar',
                'attributes': {
                    'keyA': {
                        'attribute_type': 'String',
                        'value': 'abc'
                    }
                }
            }]
        }
        s = ObservationSetSerializer(data=data)
        self.assertFalse(s.is_valid())

        s = NullableObservationSetSerializer(data=data)
        self.assertTrue(s.is_valid())
    def __init__(self, user, operation, workspace, key, submitted_value,
                 input_or_output_spec):
        super().__init__(user, operation, workspace, key, submitted_value,
                         input_or_output_spec)

        # verify that the ObservationSet is valid by using the serializer
        obs_s = ObservationSetSerializer(data=self.submitted_value)
        try:
            obs_s.is_valid(raise_exception=True)
        except ValidationError as ex:
            raise ValidationError({key: ex.detail})

        # set the instance:
        self.instance = obs_s.get_instance()
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        metadata = m.extract_metadata(resource_path)

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Exemple #4
0
 def test_metadata_correct(self):
     resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv')
     t = AnnotationTable()
     column_dict = {}
     obs_list = []
     for i, line in enumerate(open(resource_path)):
         if i == 0:
             contents = line.strip().split('\t')
             for j, c in enumerate(contents[1:]):
                 column_dict[j] = c
         else:
             contents = line.strip().split('\t')
             samplename = contents[0]
             attr_dict = {}
             for j, v in enumerate(contents[1:]):
                 attr = UnrestrictedStringAttribute(v)
                 attr_dict[column_dict[j]] = attr
             obs = Observation(samplename, attr_dict)
             obs_list.append(obs)
     expected_obs_set = ObservationSetSerializer(
         ObservationSet(obs_list)).data
     metadata = t.extract_metadata(resource_path, 'tsv')
     self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set)
     self.assertIsNone(metadata[FEATURE_SET_KEY])
     self.assertIsNone(metadata[PARENT_OP_KEY])
Exemple #5
0
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  However, here we don't validate.  Check that 
        it goes and collects the table in the process
        '''
        m = Matrix()
        resource_path = os.path.join(TESTDIR, 'test_matrix.tsv')
        metadata = m.extract_metadata(resource_path, 'tsv')

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        # Commented out when removed the feature metadata, as it was causing database
        # issues due to the size of the json object.
        #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Exemple #6
0
    def extract_metadata(self, resource_path, parent_op_pk=None):

        super().extract_metadata(resource_path, parent_op_pk)

        # the FeatureSet comes from the rows:
        f_set = FeatureSet([Feature(x) for x in self.table.index])
        self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(
            f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
Exemple #7
0
    def extract_metadata(self, resource_path, parent_op_pk=None):
        '''
        When we extract the metadata from an AnnotationTable, we 
        expect the Observation instances to be the rows.  

        Additional columns specify attributes of each Observation,
        which we incorporate
        '''
        super().extract_metadata(resource_path, parent_op_pk)

        observation_list = super().prep_metadata(Observation)
        o_set = ObservationSet(observation_list)
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
Exemple #8
0
    def extract_metadata(self,
                         resource_path,
                         file_extension,
                         parent_op_pk=None):

        super().extract_metadata(resource_path, file_extension, parent_op_pk)

        # Note: removed the addition of FeatureSets to the metadata as it was causing
        # issues with large json objects being inserted into the database.
        # the FeatureSet comes from the rows:
        # f_set = FeatureSet([Feature(x) for x in self.table.index])
        # self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata