Beispiel #1
0
    def _get_stats(self):
        filenames, mode = self._get_files("*.json")
        cols = ["A", "C", "G", "T", "N", "n_reads",
            "mean quality" , "GC content", "average read length", "total bases"]
        N = len(filenames)
        df = pd.DataFrame(np.zeros((N, 10)), columns=cols)

        indices = []
        for i, filename in enumerate(filenames):
            if self.tag_R1 in filename:
                index = "R1"
            else:
                index = "R2"
            if "unmapped" in filename:
                index += ".unmapped"
            else:
                index += ".mapped"
            indices.append(index)

            try:
                # Use a try since the subdf may be empty
                subdf = pd.read_json(filename)
                df.iloc[i] = subdf.iloc[0]
                df.iloc[i]["A"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["C"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["G"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["T"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["N"] /= df.iloc[i]["n_reads"]
            except:
                pass

        df.index = indices
        df = df.astype({"n_reads": np.int64, "total bases": np.int64})
        return df
Beispiel #2
0
 def get_stats(self):
     import pandas as pd
     filenames, mode = self._get_files("*.json")
     if mode == "pe":
         df1 = pd.read_json(filenames[0])
         df2 = pd.read_json(filenames[1])
         df  = pd.concat([df1, df2])
         # Should have been sorted !
         df.index = ['R1', 'R2']
     else:
         df = pd.read_json(filenames[0])
         df.index = ['R1']
     df = df[["A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content",
             "average read length", "total bases"]]
     for this in "ACGTN":
         df[this] /= df["total bases"] 
         df[this] *= 100
     return df
Beispiel #3
0
 def get_stats(self):
     import pandas as pd
     filenames, mode = self._get_files("*.json")
     if mode == "pe":
         df1 = pd.read_json(filenames[0])
         df2 = pd.read_json(filenames[1])
         df  = pd.concat([df1, df2])
         # Should have been sorted !
         df.index = ['R1', 'R2']
     else:
         df = pd.read_json(filenames[0])
         df.index = ['R1']
     df = df[["A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content",
             "average read length", "total bases"]]
     for this in "ACGTN":
         df[this] /= df["total bases"] 
         df[this] *= 100
     return df
Beispiel #4
0
    def _get_stats(self):
        filenames, mode = self._get_files("*.json")
        cols = [
            "A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content",
            "average read length", "total bases"
        ]
        N = len(filenames)
        df = pd.DataFrame(np.zeros((N, 10)), columns=cols)

        indices = []
        for i, filename in enumerate(filenames):
            if self.tag_R1 in filename:
                index = "R1"
            else:
                index = "R2"
            if "unmapped" in filename:
                index += ".unmapped"
            else:
                index += ".mapped"
            indices.append(index)

            try:
                # Use a try since the subdf may be empty
                subdf = pd.read_json(filename)
                df.iloc[i] = subdf.iloc[0]
                df.iloc[i]["A"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["C"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["G"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["T"] /= df.iloc[i]["n_reads"]
                df.iloc[i]["N"] /= df.iloc[i]["n_reads"]
            except:
                pass

        df.index = indices
        df = df.astype({"n_reads": np.int64, "total bases": np.int64})
        return df