Exemple #1
0
 def test_observation_set_csv_converter(self):
     obs1 = Observation('foo')
     obs2 = Observation('bar')
     obs_set = ObservationSet([obs1, obs2])
     d = obs_set.to_dict()
     c = ObservationSetCsvConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': 'foo,bar'
     } == converted_input)
                     | ({
                         'xyz': 'bar,foo'
                     } == converted_input))
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  Do that here
        '''
        m = IntegerMatrix()
        resource_path = os.path.join(TESTDIR, 'test_integer_matrix.tsv')
        metadata = m.extract_metadata(resource_path)

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
    def setUp(self):

        # create a couple Observations to use and a corresponding serializer
        self.el1 = Observation('sampleA', {'phenotype': StringAttribute('WT')})
        self.el1_serializer = ObservationSerializer(self.el1)

        self.el2 = Observation('sampleB', {'phenotype': StringAttribute('KO')})
        self.el2_serializer = ObservationSerializer(self.el2)

        # a duplicate of el1 above, for testing addition of duplicate elements:
        self.duplicate_element = Observation('sampleA', {})
        self.dup_element_serializer = ObservationSerializer(
            self.duplicate_element)

        # the correct serialized representation of an ElementSet instance
        self.expected_element_set_data = {
            'multiple': True,
            'elements': [self.el1_serializer.data, self.el2_serializer.data]
        }
        # a correctly formed instance of an ObservationSet
        self.element_set = ObservationSet([self.el1, self.el2])

        # the class that will execute the tests
        self.tester_class = ElementSetSerializerTester(
            ObservationSetSerializer)
Exemple #4
0
 def test_metadata_correct(self):
     resource_path = os.path.join(TESTDIR, 'three_column_annotation.tsv')
     t = AnnotationTable()
     column_dict = {}
     obs_list = []
     for i, line in enumerate(open(resource_path)):
         if i == 0:
             contents = line.strip().split('\t')
             for j, c in enumerate(contents[1:]):
                 column_dict[j] = c
         else:
             contents = line.strip().split('\t')
             samplename = contents[0]
             attr_dict = {}
             for j, v in enumerate(contents[1:]):
                 attr = UnrestrictedStringAttribute(v)
                 attr_dict[column_dict[j]] = attr
             obs = Observation(samplename, attr_dict)
             obs_list.append(obs)
     expected_obs_set = ObservationSetSerializer(
         ObservationSet(obs_list)).data
     metadata = t.extract_metadata(resource_path, 'tsv')
     self.assertEqual(metadata[OBSERVATION_SET_KEY], expected_obs_set)
     self.assertIsNone(metadata[FEATURE_SET_KEY])
     self.assertIsNone(metadata[PARENT_OP_KEY])
Exemple #5
0
    def test_metadata_correct_case2(self):
        '''
        Typically, the metadata is collected following a successful
        validation.  However, here we don't validate.  Check that 
        it goes and collects the table in the process
        '''
        m = Matrix()
        resource_path = os.path.join(TESTDIR, 'test_matrix.tsv')
        metadata = m.extract_metadata(resource_path, 'tsv')

        # Parse the test file to ensure we extracted the right content.
        line = open(resource_path).readline()
        contents = line.strip().split('\t')
        samplenames = contents[1:]
        obs_list = [Observation(x) for x in samplenames]

        gene_list = []
        for i, line in enumerate(open(resource_path)):
            if i > 0:
                g = line.split('\t')[0]
                gene_list.append(g)
        feature_list = [Feature(x) for x in gene_list]

        obs_set = ObservationSetSerializer(ObservationSet(obs_list)).data
        feature_set = FeatureSetSerializer(FeatureSet(feature_list)).data

        self.assertEqual(obs_set, metadata[OBSERVATION_SET_KEY])
        # Commented out when removed the feature metadata, as it was causing database
        # issues due to the size of the json object.
        #self.assertEqual(feature_set, metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[FEATURE_SET_KEY])
        self.assertIsNone(metadata[PARENT_OP_KEY])
Exemple #6
0
 def test_observation_set_list_converter(self):
     '''
     Tests that we get properly formatted JSON-compatible
     arrays (of strings in this case). Used when we need to
     supply a WDL job with a list of relevant samples as an
     array of strings, for instance.
     '''
     obs1 = Observation('foo')
     obs2 = Observation('bar')
     obs_set = ObservationSet([obs1, obs2])
     d = obs_set.to_dict()
     c = ObservationSetListConverter()
     # order doesn't matter, so need to check both orders:
     converted_input = c.convert('xyz', d, '', '')
     self.assertTrue(({
         'xyz': ['foo', 'bar']
     } == converted_input)
                     | ({
                         'xyz': ['bar', 'foo']
                     } == converted_input))
 def _build_set(self, data):
     '''
     A helper method which attempts to build an ObservationSet
     given the `data` arg. Assumes the `data` does have the 
     proper keys
     '''
     obs_list = []
     for obs_dict in data['elements']:
         obs_serializer = ObservationSerializer(data=obs_dict)
         obs = obs_serializer.get_instance()
         obs_list.append(obs)
     return ObservationSet(obs_list, data['multiple'])
Exemple #8
0
    def extract_metadata(self, resource_path, parent_op_pk=None):

        super().extract_metadata(resource_path, parent_op_pk)

        # the FeatureSet comes from the rows:
        f_set = FeatureSet([Feature(x) for x in self.table.index])
        self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(
            f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
 def test_merge_of_different_types_fails(self):
     '''
     We cannot merge two different types (e.g. and Obs Set and Feat. Set)
     Test that it raises an exception.
     '''
     element_list1 = [self.el1, self.el2]
     some_feature = Feature('geneA', {'oncogene': StringAttribute('Y')})
     element_list2 = [
         some_feature,
     ]
     obs_set = ObservationSet(element_list1)
     feature_set = FeatureSet(element_list2)
     with self.assertRaises(Exception):
         new_set = merge_element_set([obs_set, feature_set])
Exemple #10
0
    def extract_metadata(self, resource_path, parent_op_pk=None):
        '''
        When we extract the metadata from an AnnotationTable, we 
        expect the Observation instances to be the rows.  

        Additional columns specify attributes of each Observation,
        which we incorporate
        '''
        super().extract_metadata(resource_path, parent_op_pk)

        observation_list = super().prep_metadata(Observation)
        o_set = ObservationSet(observation_list)
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata
 def create(self, validated_data):
     '''
     Returns an ObservationSet instance from the validated
     data.
     '''
     obs_list = []
     for obs_dict in validated_data['elements']:
         # the validated data has the Observation info as an OrderedDict
         # below, we use the ObservationSerializer to turn that into
         # proper Observation instance.
         obs_serializer = ObservationSerializer(data=obs_dict)
         obs = obs_serializer.get_instance()
         obs_list.append(obs)
     return ObservationSet(
         obs_list, 
         validated_data['multiple']
     )
Exemple #12
0
    def extract_metadata(self,
                         resource_path,
                         file_extension,
                         parent_op_pk=None):

        super().extract_metadata(resource_path, file_extension, parent_op_pk)

        # Note: removed the addition of FeatureSets to the metadata as it was causing
        # issues with large json objects being inserted into the database.
        # the FeatureSet comes from the rows:
        # f_set = FeatureSet([Feature(x) for x in self.table.index])
        # self.metadata[DataResource.FEATURE_SET] = FeatureSetSerializer(f_set).data

        # the ObservationSet comes from the cols:
        o_set = ObservationSet([Observation(x) for x in self.table.columns])
        self.metadata[DataResource.OBSERVATION_SET] = ObservationSetSerializer(
            o_set).data
        return self.metadata