def kallisto_table(kallisto_dir, index): """ convert kallisto output to a count table where the rows are equivalence classes and the columns are cells """ quant_dir = os.path.join(kallisto_dir, "quant") out_file = os.path.join(quant_dir, "matrix.csv") if file_exists(out_file): return out_file tsvfile = os.path.join(quant_dir, "matrix.tsv") ecfile = os.path.join(quant_dir, "matrix.ec") cellsfile = os.path.join(quant_dir, "matrix.cells") fastafile = os.path.splitext(index)[0] + ".fa" fasta_names = fasta.sequence_names(fastafile) ec_names = get_ec_names(ecfile, fasta_names) df = pd.read_csv(tsvfile, header=None, names=["ec", "cell", "count"], sep="\t") df["ec"] = [ec_names[x] for x in df["ec"]] df = df.pivot(index='ec', columns='cell', values='count') cellnames = get_cell_names(cellsfile) colnames = [cellnames[x] for x in df.columns] df.columns = colnames df.to_csv(out_file) return out_file
def kallisto_table(kallisto_dir, index): """ convert kallisto output to a count table where the rows are equivalence classes and the columns are cells """ quant_dir = os.path.join(kallisto_dir, "quant") out_file = os.path.join(quant_dir, "matrix.csv") if file_exists(out_file): return out_file tsvfile = os.path.join(quant_dir, "matrix.tsv") ecfile = os.path.join(quant_dir, "matrix.ec") cellsfile = os.path.join(quant_dir, "matrix.cells") fastafile = os.path.splitext(index)[0] + ".fa" fasta_names = fasta.sequence_names(fastafile) ec_names = get_ec_names(ecfile, fasta_names) df = pd.read_table(tsvfile, header=None, names=["ec", "cell", "count"]) df["ec"] = [ec_names[x] for x in df["ec"]] df = df.pivot(index='ec', columns='cell', values='count') cellnames = get_cell_names(cellsfile) colnames = [cellnames[x] for x in df.columns] df.columns = colnames df.to_csv(out_file) return out_file