def prior_file_is_valid(self, prior_path, num_of_samples, num_of_columns=None): try: prior_file = open(prior_path, 'rU') except: _LOG.error('prior invalid: could not open prior path {0}'.format( prior_path)) return False nrows = 0 for i, line in enumerate(prior_file): if nrows == 0 and HEADER_PATTERN.match(line): pass else: nrows += 1 if not num_of_columns: num_of_columns = len(line.strip().split()) ncols = len(line.strip().split()) if num_of_columns != ncols: _LOG.error('prior invalid: num of columns at line {0} is {1} ' 'NOT {2}'.format(i + 1, ncols, num_of_columns)) return False prior_file.close() if num_of_samples != nrows: _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format( nrows, num_of_samples)) return False return True
def prior_file_is_valid(self, prior_path, num_of_samples, num_of_columns=None): try: prior_file = open(prior_path, 'rU') except: _LOG.error('prior invalid: could not open prior path {0}'.format( prior_path)) return False nrows = 0 for i, line in enumerate(prior_file): if nrows == 0 and HEADER_PATTERN.match(line): pass else: nrows += 1 if not num_of_columns: num_of_columns = len(line.strip().split()) ncols = len(line.strip().split()) if num_of_columns != ncols: _LOG.error('prior invalid: num of columns at line {0} is {1} ' 'NOT {2}'.format(i+1, ncols, num_of_columns)) return False prior_file.close() if num_of_samples != nrows: _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format( nrows, num_of_samples)) return False return True
def get_number_of_header_lines(self, path): f, close = process_file_arg(path) count = 0 for l in f: if HEADER_PATTERN.match(l.strip()): count += 1 if close: f.close() return count
def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers, shuffle_taus=True): msbayes_workers = list(msbayes_workers) s = dict( zip([i for i in msbayes_workers[0].parameter_indices], [ SampleSummarizer(tag=msbayes_workers[0].header[i]) for i in msbayes_workers[0].parameter_indices ])) ncols = None header = msbayes_workers[0].header pi = msbayes_workers[0].parameter_indices for w in msbayes_workers: self.assertEqual(w.header, header) self.assertEqual(w.parameter_indices, pi) f = open(w.prior_path, 'rU') for line_idx, row in enumerate(f): if not ncols: ncols = len(row.strip().split()) if HEADER_PATTERN.match(row.strip()): continue r = row.strip().split() assert len(r) == ncols if shuffle_taus: # because taus are sorted in prior files psi_index = get_indices_of_patterns( w.header, PSI_PATTERNS)[0] tau_indices = get_indices_of_patterns( w.header, TAU_PATTERNS) psi = int(r[psi_index]) taus = [float(r[i]) for i in tau_indices] self.assertEqual(psi, len(set(taus))) random.shuffle(taus) for n, i in enumerate(tau_indices): s[i].add_sample(taus[n]) p_set = set(w.parameter_indices) - set(tau_indices) p = sorted(list(p_set)) for i in p: s[i].add_sample(float(r[i])) else: for i in w.parameter_indices: s[i].add_sample(float(r[i])) f.close() return s
def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers, shuffle_taus=True): msbayes_workers = list(msbayes_workers) s = dict(zip( [i for i in msbayes_workers[0].parameter_indices], [SampleSummarizer( tag=msbayes_workers[0].header[i]) for i in msbayes_workers[ 0].parameter_indices])) ncols = None header = msbayes_workers[0].header pi = msbayes_workers[0].parameter_indices for w in msbayes_workers: self.assertEqual(w.header, header) self.assertEqual(w.parameter_indices, pi) f = open(w.prior_path, 'rU') for line_idx, row in enumerate(f): if not ncols: ncols = len(row.strip().split()) if HEADER_PATTERN.match(row.strip()): continue r = row.strip().split() assert len(r) == ncols if shuffle_taus: # because taus are sorted in prior files psi_index = get_indices_of_patterns(w.header, PSI_PATTERNS)[0] tau_indices = get_indices_of_patterns(w.header, TAU_PATTERNS) psi = int(r[psi_index]) taus = [float(r[i]) for i in tau_indices] self.assertEqual(psi, len(set(taus))) random.shuffle(taus) for n, i in enumerate(tau_indices): s[i].add_sample(taus[n]) p_set = set(w.parameter_indices) - set(tau_indices) p = sorted(list(p_set)) for i in p: s[i].add_sample(float(r[i])) else: for i in w.parameter_indices: s[i].add_sample(float(r[i])) f.close() return s