def _get_stats(self): filenames, mode = self._get_files("*.json") cols = ["A", "C", "G", "T", "N", "n_reads", "mean quality" , "GC content", "average read length", "total bases"] N = len(filenames) df = pd.DataFrame(np.zeros((N, 10)), columns=cols) indices = [] for i, filename in enumerate(filenames): if self.tag_R1 in filename: index = "R1" else: index = "R2" if "unmapped" in filename: index += ".unmapped" else: index += ".mapped" indices.append(index) try: # Use a try since the subdf may be empty subdf = pd.read_json(filename) df.iloc[i] = subdf.iloc[0] df.iloc[i]["A"] /= df.iloc[i]["n_reads"] df.iloc[i]["C"] /= df.iloc[i]["n_reads"] df.iloc[i]["G"] /= df.iloc[i]["n_reads"] df.iloc[i]["T"] /= df.iloc[i]["n_reads"] df.iloc[i]["N"] /= df.iloc[i]["n_reads"] except: pass df.index = indices df = df.astype({"n_reads": np.int64, "total bases": np.int64}) return df
def get_stats(self): import pandas as pd filenames, mode = self._get_files("*.json") if mode == "pe": df1 = pd.read_json(filenames[0]) df2 = pd.read_json(filenames[1]) df = pd.concat([df1, df2]) # Should have been sorted ! df.index = ['R1', 'R2'] else: df = pd.read_json(filenames[0]) df.index = ['R1'] df = df[["A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content", "average read length", "total bases"]] for this in "ACGTN": df[this] /= df["total bases"] df[this] *= 100 return df
def _get_stats(self): filenames, mode = self._get_files("*.json") cols = [ "A", "C", "G", "T", "N", "n_reads", "mean quality", "GC content", "average read length", "total bases" ] N = len(filenames) df = pd.DataFrame(np.zeros((N, 10)), columns=cols) indices = [] for i, filename in enumerate(filenames): if self.tag_R1 in filename: index = "R1" else: index = "R2" if "unmapped" in filename: index += ".unmapped" else: index += ".mapped" indices.append(index) try: # Use a try since the subdf may be empty subdf = pd.read_json(filename) df.iloc[i] = subdf.iloc[0] df.iloc[i]["A"] /= df.iloc[i]["n_reads"] df.iloc[i]["C"] /= df.iloc[i]["n_reads"] df.iloc[i]["G"] /= df.iloc[i]["n_reads"] df.iloc[i]["T"] /= df.iloc[i]["n_reads"] df.iloc[i]["N"] /= df.iloc[i]["n_reads"] except: pass df.index = indices df = df.astype({"n_reads": np.int64, "total bases": np.int64}) return df