Example #1
0
def collect_star_alignment_results(input, samples):
    """Collect star alignment results"""
    df = None
    for (f, s) in zip(input, samples):
        df_tmp = pd.read_table(f, sep="|", names=["name", "value"], engine="python", skiprows=[7,22,27])
        d = {trim_header(x, underscore=True, percent=True):recast(y) for (x,y) in zip(df_tmp["name"], df_tmp["value"])}
        if df is None:
            df = pd.DataFrame(d, index=[s])
        else:
            df = df.append(pd.DataFrame(d, index=[s]))
    return df
Example #2
0
 def test_recast(self):
     """Test recasting string to float, int, date, or other"""
     self.assertEqual(type(recast("1234")), int)
     self.assertEqual(type(recast("123.45")), float)
     self.assertEqual(type(recast("123,45")), float)
     self.assertEqual(type(recast("23.45%")), float)
     self.assertEqual(type(recast("23,45%")), float)
     self.assertEqual(type(recast("Mar 23 00:24:12")), datetime.datetime)
Example #3
0
 def _collect_results(self):
     smllogger.info("collecting results")
     df = None
     for (f, s) in zip(self._inputfiles, self._samples):
         smllogger.debug("Reading input file {f} for sample {s}".format(f=f, s=s))
         df_tmp = pd.read_table(f, sep="|",
                                names=["name", "value"],
                                engine="python", skiprows=[7, 22, 27])
         d = {trim_header(x, underscore=True, percent=True): recast(y)
              for (x, y) in zip(df_tmp["name"], df_tmp["value"])}
         if df is None:
             df = pd.DataFrame(data=d, index=pd.Index([s], name="Sample"))
         else:
             df = df.append(pd.DataFrame(data=d, index=pd.Index([s], name="Sample")))
     df['mismatch_sum'] = df['Mismatch_rate_per_base__PCT'] +\
         df['Deletion_rate_per_base'] + df['Insertion_rate_per_base']
     df['PCT_of_reads_unmapped'] = df['PCT_of_reads_unmapped:_other'] +\
         df['PCT_of_reads_unmapped:_too_many_mismatches'] +\
         df['PCT_of_reads_unmapped:_too_short']
     self['align'] = df