Example #1
0
 def test_write(self):
     self.assertEqual(FileStream.open_files, set())
     fs = open(self.test_path, 'w')
     self.assertEqual(FileStream.open_files, set([fs]))
     s = 'This\nis\n\na\n\ttest\n'
     fs.write(s)
     fs.close()
     self.assertEqual(FileStream.open_files, set())
     fs = open(self.test_path, 'rU')
     self.assertEqual(FileStream.open_files, set([fs]))
     self.assertEqual(fs.read(), s)
     fs.close()
     self.assertEqual(FileStream.open_files, set())
Example #2
0
 def test_read(self):
     self.assertEqual(FileStream.open_files, set())
     fs = open(self.cfg_path, 'rU')
     self.assertEqual(FileStream.open_files, set([fs]))
     self.assertEqual(fs.read(), fopen(self.cfg_path, 'rU').read())
     fs.close()
     self.assertEqual(FileStream.open_files, set())
 def prior_file_is_valid(self,
                         prior_path,
                         num_of_samples,
                         num_of_columns=None):
     try:
         prior_file = open(prior_path, 'rU')
     except:
         _LOG.error('prior invalid: could not open prior path {0}'.format(
             prior_path))
         return False
     nrows = 0
     for i, line in enumerate(prior_file):
         if nrows == 0 and HEADER_PATTERN.match(line):
             pass
         else:
             nrows += 1
         if not num_of_columns:
             num_of_columns = len(line.strip().split())
         ncols = len(line.strip().split())
         if num_of_columns != ncols:
             _LOG.error('prior invalid: num of columns at line {0} is {1} '
                        'NOT {2}'.format(i + 1, ncols, num_of_columns))
             return False
     prior_file.close()
     if num_of_samples != nrows:
         _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format(
             nrows, num_of_samples))
         return False
     return True
Example #4
0
 def prior_file_is_valid(self, prior_path, num_of_samples,
         num_of_columns=None):
     try:
         prior_file = open(prior_path, 'rU')
     except:
         _LOG.error('prior invalid: could not open prior path {0}'.format(
                 prior_path))
         return False
     nrows = 0
     for i, line in enumerate(prior_file):
         if nrows == 0 and HEADER_PATTERN.match(line):
             pass
         else:
             nrows += 1
         if not num_of_columns:
             num_of_columns = len(line.strip().split())
         ncols = len(line.strip().split())
         if num_of_columns != ncols:
             _LOG.error('prior invalid: num of columns at line {0} is {1} '
                     'NOT {2}'.format(i+1, ncols, num_of_columns))
             return False
     prior_file.close()
     if num_of_samples != nrows:
         _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format(
                 nrows, num_of_samples))
         return False
     return True
Example #5
0
 def test_read_compressed_read_uncompressed(self):
     gzfs = GzipFileStream(self.gz_path, 'rb')
     out = open(self.test_path, 'wb')
     for line in gzfs:
         out.write(line)
     out.close()
     gzfs.close()
     self.assertSameFiles([self.ungz_path, self.test_path],
             exclude_line_endings=True)
Example #6
0
 def test_file_object(self):
     f = open(self.cfg_path, 'rU')
     f2, close = process_file_arg(f)
     self.assertIsInstance(f2, file)
     self.assertFalse(close)
     self.assertFalse(f2.closed)
     self.assertFalse(f.closed)
     self.assertEqual(f, f2)
     f.close()
     self.assertTrue(f2.closed)
     self.assertTrue(f.closed)
 def get_parameter_summaries_from_msbayes_workers(self,
                                                  msbayes_workers,
                                                  shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(
         zip([i for i in msbayes_workers[0].parameter_indices], [
             SampleSummarizer(tag=msbayes_workers[0].header[i])
             for i in msbayes_workers[0].parameter_indices
         ]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus:  # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(
                     w.header, PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(
                     w.header, TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s
Example #8
0
 def _assert_success(self, w, num_pairs, sample_size):
     self.assertTrue(w.finished)
     self.assertEqual(0, w.exit_code)
     self.assertTrue(os.path.isdir(w.output_dir))
     self.assertTrue(os.path.isfile(w.prior_path))
     self.assertTrue(os.path.isfile(w.header_path))
     f = open(w.header_path, 'rU')
     h = f.read().strip().split()
     f.close()
     self.assertEqual(w.header, h)
     expected_p_indices, expected_s_indices = self.get_expected_indices(
             num_pairs = num_pairs,
             dummy_column = True,
             parameters_reported = True)
     self.assertEqual(w.stat_indices, expected_s_indices)
     self.assertEqual(w.parameter_indices, expected_p_indices)
     self.assertTrue(self.prior_file_is_valid(w.prior_path,
            num_of_samples = sample_size,
            num_of_columns = len(expected_p_indices + expected_s_indices)+1))
Example #9
0
 def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers,
         shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(zip(
         [i for i in msbayes_workers[0].parameter_indices],
         [SampleSummarizer(
             tag=msbayes_workers[0].header[i]) for i in msbayes_workers[
                 0].parameter_indices]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus: # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(w.header,
                         PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(w.header,
                         TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s
Example #10
0
 def test_read(self):
     gzfs = GzipFileStream(self.gz_path, 'rb')
     gzfs_str = gzfs.read()
     with open(self.ungz_path, 'rb') as fs:
         self.assertEqual(gzfs_str, fs.read())
     gzfs.close()
Example #11
0
def output_stream(filename):
    return open(output_path(filename), 'w')
Example #12
0
def data_stream(filename):
    return open(data_path(filename), 'rU')