def _parse_sample_sets(filename, sep): if not os.path.exists(filename): logging.error("Sample set file '%s' not found" % (filename)) sys.exit(1) ext = os.path.splitext(filename)[-1] if ext == '.smx': for ss in SampleSet.parse_smx(filename, sep): yield ss elif ext == '.smt': for ss in SampleSet.parse_smt(filename, sep): yield ss elif ext == '.json': for ss in SampleSet.parse_json(filename): yield ss else: logging.error('suffix not recognized (.smx, .smt, or .json)')
def test_sample_set_smt_parser(self): # generate samples samples = ['S%d' % (i) for i in range(10000)] # generate sample sets N = 100 minsize = 1 maxsize = N sample_sets = [] for i in xrange(N): sample_sets.append(generate_random_sample_set(minsize,maxsize,samples)) # write to a temp file fileh = open('tmp', 'w') fields = ['Name', 'Description'] fields.extend(samples) print >>fileh, '\t'.join(fields) for i in xrange(len(sample_sets)): ss = sample_sets[i] fields = [ss.name, ss.desc] for j in xrange(len(samples)): if samples[j] in ss.value_dict: fields.append(ss.value_dict[samples[j]]) else: fields.append('') print >>fileh, '\t'.join(map(str,fields)) fileh.close() # read into sample sets read_sample_sets = SampleSet.parse_smt('tmp') self.assertTrue(len(read_sample_sets) == N) self.assertTrue(len(read_sample_sets) == len(sample_sets)) for i in xrange(N): ss = sample_sets[i] rss = read_sample_sets[i] self.assertEqual(rss.name, ss.name) self.assertEqual(rss.desc, ss.desc) self.assertTrue(set(rss.value_dict.items()) == set(ss.value_dict.items())) a = ss.get_array(samples) b = rss.get_array(samples) self.assertTrue(np.array_equal(a, b)) os.remove('tmp')