Example #1
0
 def _export_labels(self) -> ReportOutput:
     if self.dataset.encoded_data.labels is not None:
         labels_df = pd.DataFrame(self.dataset.encoded_data.labels)
         file_path = f"{self.result_path}labels.csv"
         labels_df.to_csv(file_path, sep=",", index=False)
         return ReportOutput(file_path, "exported labels")
Example #2
0
 def _write_results_table(self, data):
     table_path = f"{self.result_path}{self.result_name}.csv"
     data.to_csv(table_path, index=False)
     return ReportOutput(table_path, "feature values")
Example #3
0
 def _export_matrix(self) -> ReportOutput:
     data = self._get_data()
     file_path = self._save_to_file(data,
                                    f"{self.result_path}design_matrix")
     return ReportOutput(file_path, "design matrix")
Example #4
0
    def export_receptorlist(self, receptors, result_path):
        export_list = []
        node_metadata_list = []
        edge_metadata_list = []

        for receptor in receptors:
            first_chain = receptor.get_chain(self.chains[0])
            second_chain = receptor.get_chain(self.chains[1])
            first_chain_name = self.get_shared_name(first_chain)
            second_chain_name = self.get_shared_name(second_chain)

            export_list.append([first_chain_name, "pair", second_chain_name])

            node_metadata_list.append(
                [first_chain_name, self.chains[0]] +
                self.get_formatted_node_metadata(first_chain))
            node_metadata_list.append(
                [second_chain_name, self.chains[1]] +
                self.get_formatted_node_metadata(second_chain))

            edge_metadata_list.append(
                [f"{first_chain_name} (pair) {second_chain_name}"] +
                self.get_formatted_edge_metadata(first_chain, second_chain))

        full_df = pd.DataFrame(
            export_list,
            columns=[self.chains[0], "relationship", self.chains[1]])
        node_meta_df = pd.DataFrame(
            node_metadata_list,
            columns=[
                "shared_name", "chain", "sequence", "v_subgroup", "v_gene",
                "j_subgroup", "j_gene"
            ] + self.additional_node_attributes)
        edge_meta_df = pd.DataFrame(edge_metadata_list,
                                    columns=["shared_name"] +
                                    self.additional_edge_attributes)

        node_cols = list(node_meta_df.columns)
        node_meta_df["n_duplicates"] = 1
        node_meta_df = node_meta_df.groupby(
            node_cols, as_index=False)["n_duplicates"].sum()

        edge_meta_df.drop_duplicates(inplace=True)
        node_meta_df.to_csv(f"{result_path}node_metadata.tsv",
                            sep="\t",
                            index=0,
                            header=True)
        edge_meta_df.to_csv(f"{result_path}edge_metadata.tsv",
                            sep="\t",
                            index=0,
                            header=True)

        if self.drop_duplicates:
            full_df.drop_duplicates(inplace=True)

        full_df.to_csv(f"{result_path}all_chains.sif",
                       sep="\t",
                       index=0,
                       header=False)

        shared_df = full_df[(full_df.duplicated(["alpha"], keep=False)) |
                            (full_df.duplicated(["beta"], keep=False))]
        shared_df.to_csv(f"{result_path}shared_chains.sif",
                         sep="\t",
                         index=0,
                         header=False)

        return [
            ReportOutput(path=f"{result_path}node_metadata.tsv"),
            ReportOutput(path=f"{result_path}edge_metadata.tsv"),
            ReportOutput(path=f"{result_path}all_chains.sif"),
            ReportOutput(path=f"{result_path}shared_chains.sif")
        ]