Ejemplo n.º 1
0
def _parse_sample_sets(filename, sep):
    if not os.path.exists(filename):
        logging.error("Sample set file '%s' not found" % (filename))
        sys.exit(1)
    ext = os.path.splitext(filename)[-1]
    if ext == '.smx':        
        for ss in SampleSet.parse_smx(filename, sep):
            yield ss
    elif ext == '.smt':
        for ss in SampleSet.parse_smt(filename, sep):
            yield ss
    elif ext == '.json':
        for ss in SampleSet.parse_json(filename):
            yield ss
    else:
        logging.error('suffix not recognized (.smx, .smt, or .json)')
Ejemplo n.º 2
0
 def test_sample_set_smt_parser(self):
     # generate samples
     samples = ['S%d' % (i) for i in range(10000)]
     # generate sample sets
     N = 100
     minsize = 1
     maxsize = N
     sample_sets = []
     for i in xrange(N):
         sample_sets.append(generate_random_sample_set(minsize,maxsize,samples))
     # write to a temp file
     fileh = open('tmp', 'w')
     fields = ['Name', 'Description']
     fields.extend(samples)
     print >>fileh, '\t'.join(fields)
     for i in xrange(len(sample_sets)):
         ss = sample_sets[i]
         fields = [ss.name, ss.desc]
         for j in xrange(len(samples)):
             if samples[j] in ss.value_dict:
                 fields.append(ss.value_dict[samples[j]])
             else:
                 fields.append('')
         print >>fileh, '\t'.join(map(str,fields))
     fileh.close()
     # read into sample sets
     read_sample_sets = SampleSet.parse_smt('tmp')
     self.assertTrue(len(read_sample_sets) == N)
     self.assertTrue(len(read_sample_sets) == len(sample_sets))
     for i in xrange(N):
         ss = sample_sets[i]
         rss = read_sample_sets[i]
         self.assertEqual(rss.name, ss.name)
         self.assertEqual(rss.desc, ss.desc)
         self.assertTrue(set(rss.value_dict.items()) == 
                         set(ss.value_dict.items()))
         a = ss.get_array(samples)
         b = rss.get_array(samples)
         self.assertTrue(np.array_equal(a, b))            
     os.remove('tmp')