def build_probe_2_gene_training_data(self, data_dir, outcome_file,
                                         probe_2_gene_file):
        excel_obj = ExcelReader()
        data_reader_obj = DataReader()

        outcome_dict = excel_obj.get_cyto_cnv_result(outcome_file)
        cnv_df = data_reader_obj.cnv_data_reader_pipeline(data_dir)

        #### probe mapping to gene
        (probe_2_gene, gene_2_probe
         ) = data_reader_obj.get_probe_to_gene_table(probe_2_gene_file)

        gene_cnv = data_reader_obj.build_cnv_to_gene(cnv_df, gene_2_probe)

        ### probe cnv
        # # data_df = data_reader_obj.combine_outcome_data(cnv_df, outcome_dict)

        ## gene cnv
        data_df = data_reader_obj.combine_outcome_data(gene_cnv, outcome_dict)

        return data_df