def _parse_sample_sets(filename, sep):
    if not os.path.exists(filename):
        logging.error("Sample set file '%s' not found" % (filename))
        sys.exit(1)
    ext = os.path.splitext(filename)[-1]
    if ext == '.smx':        
        for ss in SampleSet.parse_smx(filename, sep):
            yield ss
    elif ext == '.smt':
        for ss in SampleSet.parse_smt(filename, sep):
            yield ss
    elif ext == '.json':
        for ss in SampleSet.parse_json(filename):
            yield ss
    else:
        logging.error('suffix not recognized (.smx, .smt, or .json)')
Beispiel #2
0
 def test_sample_set_smx_parser(self):
     # generate samples
     samples = ['S%d' % (i) for i in range(10000)]
     # generate sample sets
     N = 100
     minsize = 1
     maxsize = N
     sample_sets = []
     for i in xrange(N):
         sample_sets.append(generate_random_sample_set(minsize,maxsize,samples))
     # write to a temp file
     names = ['Name'] + [ss.name for ss in sample_sets]
     descs = ['Desc'] + [ss.desc for ss in sample_sets]
     with open('tmp', 'w') as fileh:
         print >>fileh, '\t'.join(names)
         print >>fileh, '\t'.join(descs)
         for i in xrange(len(samples)):
             fields = [samples[i]]
             for j in xrange(len(sample_sets)):
                 if samples[i] in sample_sets[j].value_dict:
                     fields.append(sample_sets[j].value_dict[samples[i]])
                 else:
                     fields.append('')
             print >>fileh, '\t'.join(map(str,fields))
     fileh.close()
     # read into sample sets
     read_sample_sets = SampleSet.parse_smx('tmp')
     self.assertTrue(len(read_sample_sets) == N)
     self.assertTrue(len(read_sample_sets) == len(sample_sets))
     for i in xrange(N):
         ss = sample_sets[i]
         rss = read_sample_sets[i]
         self.assertEqual(rss.name, ss.name)
         self.assertEqual(rss.desc, ss.desc)
         self.assertTrue(set(rss.value_dict.items()) == 
                         set(ss.value_dict.items()))
         a = ss.get_array(samples)
         b = rss.get_array(samples)
         self.assertTrue(np.array_equal(a, b))
     os.remove('tmp')