Beispiel #1
0
    def test_parse_object_string_sample(self):
        """Test parsing sample name from object string."""
        obs = parse_object_string_sample('foo_bar123')
        self.assertEqual(obs, 'foo')

        with self.assertRaises(ValueError):
            _ = parse_object_string_sample('foo')
Beispiel #2
0
def build_problem_data(group_map_files, mapping_file, prediction_field,
                       start_level, include_only, negate, n_processes):
    #For each scope, build a map from group to object and vice versa
    group_to_object = []
    object_to_group = []
    for map_file in group_map_files:
        g_to_o, o_to_g = read_split_file(map_file)
        group_to_object.append(g_to_o)
        object_to_group.append(o_to_g)

    #Find a list of sample names from our group names
    #An alternative is 'samplenames = samplemap.keys()', but that may have records without features
    samplenames = set()
    for grp in group_to_object[start_level]:
        l = group_to_object[start_level][grp]
        for obj in l:
            samplenames.add(parse_object_string_sample(obj))
    samplenames = list(samplenames)

    #get a map of sample name to it's properties
    samplemap = read_mapping_file(mapping_file)

    sample_to_response = {}
    for samplename in samplenames:
        if (include_only is None or
            ((samplemap[samplename][include_only[0]] in include_only[1]) ^ negate)):
            sample_to_response[samplename] = samplemap[samplename][prediction_field]

    problem_data = ProblemData(group_to_object, object_to_group, sample_to_response, n_processes)

    feature_vector = FeatureVector([FeatureRecord(group, start_level,
                                                  len(group_to_object[start_level][group]))
                                    for group in group_to_object[start_level].keys()])

    return problem_data, feature_vector