Exemple #1
0
    def test_IgorModel_Parms(self):
        species = "human"
        chain = "tcr_beta"

        mdl_parms = IgorModel_Parms()

        fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain(
            species, chain)
        self.assertTrue(os.path.isfile(fln_model_parms))
        mdl_parms.read_model_parms(fln_model_parms)

        self.assertFalse(
            'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list())
        self.assertFalse(
            'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list())

        fln_dict = get_default_fln_dict_ref_genomes_species_chain(
            species, chain)
        self.assertTrue(os.path.isfile(fln_dict["fln_V_gene_CDR3_anchors"]))
        self.assertTrue(os.path.isfile(fln_dict["fln_J_gene_CDR3_anchors"]))

        mdl_parms.attach_V_anchors_from_file(
            fln_dict["fln_V_gene_CDR3_anchors"])
        mdl_parms.attach_J_anchors_from_file(
            fln_dict["fln_J_gene_CDR3_anchors"])

        self.assertTrue(
            'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list())
        self.assertTrue(
            'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list())
Exemple #2
0
    def test_IgorModel_Parms_get_IgorRefGenome_VJ(self):
        """
        Return an IgorRefGenome object generated from GeneChoice events
        and use it to write a ref_genome directory that will be use to run IGoR.
        """
        species = "human"
        chain = "tcr_alpha"
        fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain(
            species, chain)
        fln_dict = get_default_fln_dict_ref_genomes_species_chain(
            species, chain)

        # 1. Make an IgorModel_Parms from scratch
        mdl_parms = IgorModel_Parms(
            model_parms_file=fln_model_parms,
            fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"],
            fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"])

        self.assertTrue(mdl_parms.event_GeneChoice_D is None)
        self.assertTrue(mdl_parms.df_D_ref_genome is None)

        # 2. Get IgorRefGenome from events
        ref_genome = mdl_parms.get_IgorRefGenome()
        # 3. Write a ref_genome_dir
        tmp_ref_genome_dir = tempfile.TemporaryDirectory(dir='.',
                                                         prefix="ref_genome")
        ref_genome.write_ref_genome_dir(tmp_ref_genome_dir.name)

        fln_tmp_dict = dict()
        fln_tmp_dict[
            'fln_genomicVs'] = tmp_ref_genome_dir.name + "/" + "genomicVs.fasta"
        fln_tmp_dict[
            'fln_genomicJs'] = tmp_ref_genome_dir.name + "/" + "genomicJs.fasta"
        fln_tmp_dict[
            'fln_V_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "V_gene_CDR3_anchors.csv"
        fln_tmp_dict[
            'fln_J_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "J_gene_CDR3_anchors.csv"

        self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicVs"]))
        self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicJs"]))
        self.assertTrue(os.path.isfile(
            fln_tmp_dict["fln_V_gene_CDR3_anchors"]))
        self.assertTrue(os.path.isfile(
            fln_tmp_dict["fln_J_gene_CDR3_anchors"]))

        self.assertFalse(
            os.path.isfile(tmp_ref_genome_dir.name + "/" + "genomicDs.fasta"))

        tmp_ref_genome_dir.cleanup()
Exemple #3
0
    def test_IgorModel_Parms_from_dataframe(self):
        mdl_hb = get_default_IgorModel("human", "tcr_beta")
        import copy
        genomic_dict = copy.deepcopy(mdl_hb.genomic_dataframe_dict)

        genomic_dict['V']['name'] = v_genLabel(genomic_dict['V']['name'])
        genomic_dict['J']['name'] = v_genLabel(genomic_dict['J']['name'])

        new_V_gene_dict = {
            'name': 'my_pseudo_TRBV',
            'value':
            'AAACCCTTTGGGACCCAGAGCCCAAGACACAAGATCACAGAGACAGGAAGGCAGGTGACCTTGGCGTGTCACCAGACTTGGAACCACAACAATATGTTCTGGTATCGACAAGACCTGGGACATGGGCTGAGGCTGATCCATTACTCATATGGTGTTCACGACACTAACAAAGGAGAAGTCTCAGATGGCTACAGTGTCTCTAGATCAAACACAGAGGACCTCCCCCTCACTCTGTAGTCTGCTGCCTCCTCCCAGACATCTGTATATTTCTGCGCCAGCAGTGAGTC',
            'anchor_index': 270
        }
        df_V = genomic_dict['V'].loc[10:15]
        df_V = df_V.append(new_V_gene_dict, ignore_index=True)
        df_V.index.name = 'id'
        df_V

        mdl_parms_0 = IgorModel_Parms.make_default_VDJ(df_V, genomic_dict['D'],
                                                       genomic_dict['J'])
        mdl_marginals_0 = IgorModel_Marginals.make_uniform_from_parms(
            mdl_parms_0)
        print("parms.Edges: ")
        print(mdl_hb.parms.Edges)
        print(mdl_parms_0.Edges)

        print("parms.Edges_dict:")
        print(mdl_hb.parms.Edges_dict)
        print(mdl_parms_0.Edges_dict)
        print("marginals.network_dict")
        print(mdl_hb.marginals.network_dict)
        print(mdl_marginals_0.network_dict)

        self.assertIsInstance(mdl_parms_0, IgorModel_Parms)
Exemple #4
0
    def test_get_df_ref_genome_from_files(self):
        ofile_mock_VDJ_fln_genomicVs = io.StringIO(str_mock_VDJ_fln_genomicVs)
        ofile_mock_VDJ_fln_V_gene_CDR3_anchors = io.StringIO(
            str_mock_VDJ_fln_V_gene_CDR3_anchors)

        ofile_mock_VDJ_fln_genomicDs = io.StringIO(str_mock_VDJ_fln_genomicDs)

        ofile_mock_VDJ_fln_genomicJs = io.StringIO(str_mock_VDJ_fln_genomicJs)
        ofile_mock_VDJ_fln_J_gene_CDR3_anchors = io.StringIO(
            str_mock_VDJ_fln_J_gene_CDR3_anchors)

        df_V_ref_genome = get_dataframe_from_fasta_and_csv_anchors(
            ofile_mock_VDJ_fln_genomicVs,
            ofile_mock_VDJ_fln_V_gene_CDR3_anchors)
        df_D_ref_genome = get_dataframe_from_fasta_and_csv_anchors(
            ofile_mock_VDJ_fln_genomicDs)

        df_J_ref_genome = get_dataframe_from_fasta_and_csv_anchors(
            ofile_mock_VDJ_fln_genomicJs,
            ofile_mock_VDJ_fln_J_gene_CDR3_anchors)
        self.assertIsInstance(df_V_ref_genome, pd.DataFrame)
        self.assertIsInstance(df_D_ref_genome, pd.DataFrame)
        self.assertIsInstance(df_J_ref_genome, pd.DataFrame)

        mdl_parms = IgorModel_Parms.make_default_VDJ(df_V_ref_genome,
                                                     df_D_ref_genome,
                                                     df_J_ref_genome)
        self.assertIsInstance(mdl_parms, IgorModel_Parms)
Exemple #5
0
    def test_IgorModel_Parms_get_IgorRefGenome_VDJ(self):
        species = "human"
        chain = "tcr_beta"
        fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain(
            species, chain)
        fln_dict = get_default_fln_dict_ref_genomes_species_chain(
            species, chain)

        mdl_parms = IgorModel_Parms(
            model_parms_file=fln_model_parms,
            fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"],
            fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"])

        self.assertFalse(mdl_parms.df_V_anchors is None)
        self.assertFalse(mdl_parms.df_J_anchors is None)
        self.assertFalse(mdl_parms.event_GeneChoice_D is None)
        self.assertFalse(mdl_parms.df_D_ref_genome is None)
        ref_genome = mdl_parms.get_IgorRefGenome()
        mdl_parms.gen_EventDict_DataFrame()

        tmp_ref_genome_dir = tempfile.TemporaryDirectory(dir='.',
                                                         prefix="ref_genome")
        ref_genome.write_ref_genome_dir(tmp_ref_genome_dir.name)
        print(ref_genome.to_dict())

        fln_tmp_dict = dict()
        fln_tmp_dict[
            'fln_genomicVs'] = tmp_ref_genome_dir.name + "/" + "genomicVs.fasta"
        fln_tmp_dict[
            'fln_genomicDs'] = tmp_ref_genome_dir.name + "/" + "genomicDs.fasta"
        fln_tmp_dict[
            'fln_genomicJs'] = tmp_ref_genome_dir.name + "/" + "genomicJs.fasta"
        fln_tmp_dict[
            'fln_V_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "V_gene_CDR3_anchors.csv"
        fln_tmp_dict[
            'fln_J_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "J_gene_CDR3_anchors.csv"
        # time.sleep(20)

        self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicVs"]))
        self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicDs"]))
        self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicJs"]))
        self.assertTrue(os.path.isfile(
            fln_tmp_dict["fln_V_gene_CDR3_anchors"]))
        self.assertTrue(os.path.isfile(
            fln_tmp_dict["fln_J_gene_CDR3_anchors"]))

        tmp_ref_genome_dir.cleanup()
Exemple #6
0
    def test_IgorModel_write_model(self):
        mdl_hb = IgorModel.load_default("human", "tcr_beta")
        fln_model_parms = 'model_parms.txt'
        fln_model_marginals = 'model_marginals.txt'
        fln_V_gene_CDR3_anchors = 'V_gene_CDR3_anchors.csv'
        fln_J_gene_CDR3_anchors = 'J_gene_CDR3_anchors.csv'

        ## TODO: ADD anchors
        mdl_hb.write_model(fln_model_parms, fln_model_marginals,
                           fln_V_gene_CDR3_anchors, fln_J_gene_CDR3_anchors)
        mdl_hb_2 = IgorModel(fln_model_parms,
                             fln_model_marginals,
                             fln_V_gene_CDR3_anchors=fln_V_gene_CDR3_anchors,
                             fln_J_gene_CDR3_anchors=fln_J_gene_CDR3_anchors)

        # TODO: CHANGE ANCHORS WITH DATAFRAME.
        mdl_hb_2

        # FIXME: SOLVE THE REFERENCES TO ANCHORS AND SEQUENCES NAMES PROBLEM
        # mdl_hb.parms.df_V_ref_genome
        print("mdl_hb_2.parms.dictNameNickname: ",
              mdl_hb_2.parms.dictNameNickname)
        df = mdl_hb_2.get_event_realizations_DataFrame('j_choice')

        print("mdl_hb_2.genomic_dataframe_dict: ",
              mdl_hb_2.genomic_dataframe_dict)
        new_df = df[:4]
        mdl_hb_2.parms.gen_NameNickname_dict()

        mdl_hb_2.set_realization_event_from_DataFrame('j_choice', new_df)
        mdl_hb_2.set_event_realizations_from_DataFrame('j_choice', new_df)
        print("mdl_hb_2.parms.dictNameNickname: ",
              mdl_hb_2.parms.dictNameNickname)
        mdl_copy.set_genomic_dataframe_dict()

        mdl_parms = IgorModel_Parms()
        mdl_parms.Event_list  # add_event() #add_Event()

        # import copy
        # mdl_copy = copy.deepcopy(mdl_hb)
        # mdl_copy

        # mdl_hb.parms.df_V_ref_genome
        # v_genLabel(mdl_hb.parms.df_V_ref_genome['name'])

        # print(mdl_hb_2.parms.df_V_ref_genome)
        # mdl_hb_2.genomic_dataframe_dict

        self.assertIsInstance(mdl_hb_2, IgorModel)
        self.assertTrue(os.path.isfile(fln_model_parms))
        self.assertTrue(os.path.isfile(fln_model_marginals))
        self.assertTrue(os.path.isfile(fln_V_gene_CDR3_anchors))
        self.assertTrue(os.path.isfile(fln_J_gene_CDR3_anchors))
        cmd = "rm {} {} {} {}".format(fln_model_parms, fln_model_marginals,
                                      fln_V_gene_CDR3_anchors,
                                      fln_J_gene_CDR3_anchors)
        p = subprocess.run(cmd, shell=True, capture_output=True, text=True)
Exemple #7
0
 def test_IgorModel_Parms_from_IgorRefGenome(self):
     ref_genome = IgorRefGenome.load_default("human", "tcr_alpha")
     self.assertIsInstance(ref_genome, IgorRefGenome)
     ref_genome_dict = ref_genome.to_dict()
     print(ref_genome.df_genomicVs)
     print(ref_genome.df_V_anchors)
     print(IgorRefGenome.V)
     mdl_parms = IgorModel_Parms.make_default_VDJ_from_IgorRefGenome(
         ref_genome)
Exemple #8
0
    def test_IgorModel_Parms_with_anchors(self):
        species = "human"
        chain = "tcr_beta"
        fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain(
            species, chain)
        fln_dict = get_default_fln_dict_ref_genomes_species_chain(
            species, chain)
        print("fln_dict: ", fln_dict)

        mdl_parms = IgorModel_Parms(
            model_parms_file=fln_model_parms,
            fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"],
            fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"])
        # no function in anchors file
        self.assertTrue(
            'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list())
        self.assertTrue(
            'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list())