def test_write(self): self.assertEqual(FileStream.open_files, set()) fs = open(self.test_path, 'w') self.assertEqual(FileStream.open_files, set([fs])) s = 'This\nis\n\na\n\ttest\n' fs.write(s) fs.close() self.assertEqual(FileStream.open_files, set()) fs = open(self.test_path, 'rU') self.assertEqual(FileStream.open_files, set([fs])) self.assertEqual(fs.read(), s) fs.close() self.assertEqual(FileStream.open_files, set())
def test_read(self): self.assertEqual(FileStream.open_files, set()) fs = open(self.cfg_path, 'rU') self.assertEqual(FileStream.open_files, set([fs])) self.assertEqual(fs.read(), fopen(self.cfg_path, 'rU').read()) fs.close() self.assertEqual(FileStream.open_files, set())
def prior_file_is_valid(self, prior_path, num_of_samples, num_of_columns=None): try: prior_file = open(prior_path, 'rU') except: _LOG.error('prior invalid: could not open prior path {0}'.format( prior_path)) return False nrows = 0 for i, line in enumerate(prior_file): if nrows == 0 and HEADER_PATTERN.match(line): pass else: nrows += 1 if not num_of_columns: num_of_columns = len(line.strip().split()) ncols = len(line.strip().split()) if num_of_columns != ncols: _LOG.error('prior invalid: num of columns at line {0} is {1} ' 'NOT {2}'.format(i + 1, ncols, num_of_columns)) return False prior_file.close() if num_of_samples != nrows: _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format( nrows, num_of_samples)) return False return True
def prior_file_is_valid(self, prior_path, num_of_samples, num_of_columns=None): try: prior_file = open(prior_path, 'rU') except: _LOG.error('prior invalid: could not open prior path {0}'.format( prior_path)) return False nrows = 0 for i, line in enumerate(prior_file): if nrows == 0 and HEADER_PATTERN.match(line): pass else: nrows += 1 if not num_of_columns: num_of_columns = len(line.strip().split()) ncols = len(line.strip().split()) if num_of_columns != ncols: _LOG.error('prior invalid: num of columns at line {0} is {1} ' 'NOT {2}'.format(i+1, ncols, num_of_columns)) return False prior_file.close() if num_of_samples != nrows: _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format( nrows, num_of_samples)) return False return True
def test_read_compressed_read_uncompressed(self): gzfs = GzipFileStream(self.gz_path, 'rb') out = open(self.test_path, 'wb') for line in gzfs: out.write(line) out.close() gzfs.close() self.assertSameFiles([self.ungz_path, self.test_path], exclude_line_endings=True)
def test_file_object(self): f = open(self.cfg_path, 'rU') f2, close = process_file_arg(f) self.assertIsInstance(f2, file) self.assertFalse(close) self.assertFalse(f2.closed) self.assertFalse(f.closed) self.assertEqual(f, f2) f.close() self.assertTrue(f2.closed) self.assertTrue(f.closed)
def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers, shuffle_taus=True): msbayes_workers = list(msbayes_workers) s = dict( zip([i for i in msbayes_workers[0].parameter_indices], [ SampleSummarizer(tag=msbayes_workers[0].header[i]) for i in msbayes_workers[0].parameter_indices ])) ncols = None header = msbayes_workers[0].header pi = msbayes_workers[0].parameter_indices for w in msbayes_workers: self.assertEqual(w.header, header) self.assertEqual(w.parameter_indices, pi) f = open(w.prior_path, 'rU') for line_idx, row in enumerate(f): if not ncols: ncols = len(row.strip().split()) if HEADER_PATTERN.match(row.strip()): continue r = row.strip().split() assert len(r) == ncols if shuffle_taus: # because taus are sorted in prior files psi_index = get_indices_of_patterns( w.header, PSI_PATTERNS)[0] tau_indices = get_indices_of_patterns( w.header, TAU_PATTERNS) psi = int(r[psi_index]) taus = [float(r[i]) for i in tau_indices] self.assertEqual(psi, len(set(taus))) random.shuffle(taus) for n, i in enumerate(tau_indices): s[i].add_sample(taus[n]) p_set = set(w.parameter_indices) - set(tau_indices) p = sorted(list(p_set)) for i in p: s[i].add_sample(float(r[i])) else: for i in w.parameter_indices: s[i].add_sample(float(r[i])) f.close() return s
def _assert_success(self, w, num_pairs, sample_size): self.assertTrue(w.finished) self.assertEqual(0, w.exit_code) self.assertTrue(os.path.isdir(w.output_dir)) self.assertTrue(os.path.isfile(w.prior_path)) self.assertTrue(os.path.isfile(w.header_path)) f = open(w.header_path, 'rU') h = f.read().strip().split() f.close() self.assertEqual(w.header, h) expected_p_indices, expected_s_indices = self.get_expected_indices( num_pairs = num_pairs, dummy_column = True, parameters_reported = True) self.assertEqual(w.stat_indices, expected_s_indices) self.assertEqual(w.parameter_indices, expected_p_indices) self.assertTrue(self.prior_file_is_valid(w.prior_path, num_of_samples = sample_size, num_of_columns = len(expected_p_indices + expected_s_indices)+1))
def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers, shuffle_taus=True): msbayes_workers = list(msbayes_workers) s = dict(zip( [i for i in msbayes_workers[0].parameter_indices], [SampleSummarizer( tag=msbayes_workers[0].header[i]) for i in msbayes_workers[ 0].parameter_indices])) ncols = None header = msbayes_workers[0].header pi = msbayes_workers[0].parameter_indices for w in msbayes_workers: self.assertEqual(w.header, header) self.assertEqual(w.parameter_indices, pi) f = open(w.prior_path, 'rU') for line_idx, row in enumerate(f): if not ncols: ncols = len(row.strip().split()) if HEADER_PATTERN.match(row.strip()): continue r = row.strip().split() assert len(r) == ncols if shuffle_taus: # because taus are sorted in prior files psi_index = get_indices_of_patterns(w.header, PSI_PATTERNS)[0] tau_indices = get_indices_of_patterns(w.header, TAU_PATTERNS) psi = int(r[psi_index]) taus = [float(r[i]) for i in tau_indices] self.assertEqual(psi, len(set(taus))) random.shuffle(taus) for n, i in enumerate(tau_indices): s[i].add_sample(taus[n]) p_set = set(w.parameter_indices) - set(tau_indices) p = sorted(list(p_set)) for i in p: s[i].add_sample(float(r[i])) else: for i in w.parameter_indices: s[i].add_sample(float(r[i])) f.close() return s
def test_read(self): gzfs = GzipFileStream(self.gz_path, 'rb') gzfs_str = gzfs.read() with open(self.ungz_path, 'rb') as fs: self.assertEqual(gzfs_str, fs.read()) gzfs.close()
def output_stream(filename): return open(output_path(filename), 'w')
def data_stream(filename): return open(data_path(filename), 'rU')