def test_parse_object_string_sample(self): """Test parsing sample name from object string.""" obs = parse_object_string_sample('foo_bar123') self.assertEqual(obs, 'foo') with self.assertRaises(ValueError): _ = parse_object_string_sample('foo')
def build_problem_data(group_map_files, mapping_file, prediction_field, start_level, include_only, negate, n_processes): #For each scope, build a map from group to object and vice versa group_to_object = [] object_to_group = [] for map_file in group_map_files: g_to_o, o_to_g = read_split_file(map_file) group_to_object.append(g_to_o) object_to_group.append(o_to_g) #Find a list of sample names from our group names #An alternative is 'samplenames = samplemap.keys()', but that may have records without features samplenames = set() for grp in group_to_object[start_level]: l = group_to_object[start_level][grp] for obj in l: samplenames.add(parse_object_string_sample(obj)) samplenames = list(samplenames) #get a map of sample name to it's properties samplemap = read_mapping_file(mapping_file) sample_to_response = {} for samplename in samplenames: if (include_only is None or ((samplemap[samplename][include_only[0]] in include_only[1]) ^ negate)): sample_to_response[samplename] = samplemap[samplename][prediction_field] problem_data = ProblemData(group_to_object, object_to_group, sample_to_response, n_processes) feature_vector = FeatureVector([FeatureRecord(group, start_level, len(group_to_object[start_level][group])) for group in group_to_object[start_level].keys()]) return problem_data, feature_vector