def prior_file_is_valid(self,
                         prior_path,
                         num_of_samples,
                         num_of_columns=None):
     try:
         prior_file = open(prior_path, 'rU')
     except:
         _LOG.error('prior invalid: could not open prior path {0}'.format(
             prior_path))
         return False
     nrows = 0
     for i, line in enumerate(prior_file):
         if nrows == 0 and HEADER_PATTERN.match(line):
             pass
         else:
             nrows += 1
         if not num_of_columns:
             num_of_columns = len(line.strip().split())
         ncols = len(line.strip().split())
         if num_of_columns != ncols:
             _LOG.error('prior invalid: num of columns at line {0} is {1} '
                        'NOT {2}'.format(i + 1, ncols, num_of_columns))
             return False
     prior_file.close()
     if num_of_samples != nrows:
         _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format(
             nrows, num_of_samples))
         return False
     return True
Esempio n. 2
0
 def prior_file_is_valid(self, prior_path, num_of_samples,
         num_of_columns=None):
     try:
         prior_file = open(prior_path, 'rU')
     except:
         _LOG.error('prior invalid: could not open prior path {0}'.format(
                 prior_path))
         return False
     nrows = 0
     for i, line in enumerate(prior_file):
         if nrows == 0 and HEADER_PATTERN.match(line):
             pass
         else:
             nrows += 1
         if not num_of_columns:
             num_of_columns = len(line.strip().split())
         ncols = len(line.strip().split())
         if num_of_columns != ncols:
             _LOG.error('prior invalid: num of columns at line {0} is {1} '
                     'NOT {2}'.format(i+1, ncols, num_of_columns))
             return False
     prior_file.close()
     if num_of_samples != nrows:
         _LOG.error('prior invalid: num of rows is {0} NOT {1}'.format(
                 nrows, num_of_samples))
         return False
     return True
 def get_number_of_header_lines(self, path):
     f, close = process_file_arg(path)
     count = 0
     for l in f:
         if HEADER_PATTERN.match(l.strip()):
             count += 1
     if close:
         f.close()
     return count
Esempio n. 4
0
 def get_number_of_header_lines(self, path):
     f, close = process_file_arg(path)
     count = 0
     for l in f:
         if HEADER_PATTERN.match(l.strip()):
             count += 1
     if close:
         f.close()
     return count
 def get_parameter_summaries_from_msbayes_workers(self,
                                                  msbayes_workers,
                                                  shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(
         zip([i for i in msbayes_workers[0].parameter_indices], [
             SampleSummarizer(tag=msbayes_workers[0].header[i])
             for i in msbayes_workers[0].parameter_indices
         ]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus:  # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(
                     w.header, PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(
                     w.header, TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s
Esempio n. 6
0
 def get_parameter_summaries_from_msbayes_workers(self, msbayes_workers,
         shuffle_taus=True):
     msbayes_workers = list(msbayes_workers)
     s = dict(zip(
         [i for i in msbayes_workers[0].parameter_indices],
         [SampleSummarizer(
             tag=msbayes_workers[0].header[i]) for i in msbayes_workers[
                 0].parameter_indices]))
     ncols = None
     header = msbayes_workers[0].header
     pi = msbayes_workers[0].parameter_indices
     for w in msbayes_workers:
         self.assertEqual(w.header, header)
         self.assertEqual(w.parameter_indices, pi)
         f = open(w.prior_path, 'rU')
         for line_idx, row in enumerate(f):
             if not ncols:
                 ncols = len(row.strip().split())
             if HEADER_PATTERN.match(row.strip()):
                 continue
             r = row.strip().split()
             assert len(r) == ncols
             if shuffle_taus: # because taus are sorted in prior files
                 psi_index = get_indices_of_patterns(w.header,
                         PSI_PATTERNS)[0]
                 tau_indices = get_indices_of_patterns(w.header,
                         TAU_PATTERNS)
                 psi = int(r[psi_index])
                 taus = [float(r[i]) for i in tau_indices]
                 self.assertEqual(psi, len(set(taus)))
                 random.shuffle(taus)
                 for n, i in enumerate(tau_indices):
                     s[i].add_sample(taus[n])
                 p_set = set(w.parameter_indices) - set(tau_indices)
                 p = sorted(list(p_set))
                 for i in p:
                     s[i].add_sample(float(r[i]))
             else:
                 for i in w.parameter_indices:
                     s[i].add_sample(float(r[i]))
         f.close()
     return s