def test_IgorModel_Parms(self): species = "human" chain = "tcr_beta" mdl_parms = IgorModel_Parms() fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain( species, chain) self.assertTrue(os.path.isfile(fln_model_parms)) mdl_parms.read_model_parms(fln_model_parms) self.assertFalse( 'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list()) self.assertFalse( 'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list()) fln_dict = get_default_fln_dict_ref_genomes_species_chain( species, chain) self.assertTrue(os.path.isfile(fln_dict["fln_V_gene_CDR3_anchors"])) self.assertTrue(os.path.isfile(fln_dict["fln_J_gene_CDR3_anchors"])) mdl_parms.attach_V_anchors_from_file( fln_dict["fln_V_gene_CDR3_anchors"]) mdl_parms.attach_J_anchors_from_file( fln_dict["fln_J_gene_CDR3_anchors"]) self.assertTrue( 'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list()) self.assertTrue( 'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list())
def test_IgorModel_Parms_get_IgorRefGenome_VJ(self): """ Return an IgorRefGenome object generated from GeneChoice events and use it to write a ref_genome directory that will be use to run IGoR. """ species = "human" chain = "tcr_alpha" fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain( species, chain) fln_dict = get_default_fln_dict_ref_genomes_species_chain( species, chain) # 1. Make an IgorModel_Parms from scratch mdl_parms = IgorModel_Parms( model_parms_file=fln_model_parms, fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"], fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"]) self.assertTrue(mdl_parms.event_GeneChoice_D is None) self.assertTrue(mdl_parms.df_D_ref_genome is None) # 2. Get IgorRefGenome from events ref_genome = mdl_parms.get_IgorRefGenome() # 3. Write a ref_genome_dir tmp_ref_genome_dir = tempfile.TemporaryDirectory(dir='.', prefix="ref_genome") ref_genome.write_ref_genome_dir(tmp_ref_genome_dir.name) fln_tmp_dict = dict() fln_tmp_dict[ 'fln_genomicVs'] = tmp_ref_genome_dir.name + "/" + "genomicVs.fasta" fln_tmp_dict[ 'fln_genomicJs'] = tmp_ref_genome_dir.name + "/" + "genomicJs.fasta" fln_tmp_dict[ 'fln_V_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "V_gene_CDR3_anchors.csv" fln_tmp_dict[ 'fln_J_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "J_gene_CDR3_anchors.csv" self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicVs"])) self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicJs"])) self.assertTrue(os.path.isfile( fln_tmp_dict["fln_V_gene_CDR3_anchors"])) self.assertTrue(os.path.isfile( fln_tmp_dict["fln_J_gene_CDR3_anchors"])) self.assertFalse( os.path.isfile(tmp_ref_genome_dir.name + "/" + "genomicDs.fasta")) tmp_ref_genome_dir.cleanup()
def test_IgorModel_Parms_from_dataframe(self): mdl_hb = get_default_IgorModel("human", "tcr_beta") import copy genomic_dict = copy.deepcopy(mdl_hb.genomic_dataframe_dict) genomic_dict['V']['name'] = v_genLabel(genomic_dict['V']['name']) genomic_dict['J']['name'] = v_genLabel(genomic_dict['J']['name']) new_V_gene_dict = { 'name': 'my_pseudo_TRBV', 'value': 'AAACCCTTTGGGACCCAGAGCCCAAGACACAAGATCACAGAGACAGGAAGGCAGGTGACCTTGGCGTGTCACCAGACTTGGAACCACAACAATATGTTCTGGTATCGACAAGACCTGGGACATGGGCTGAGGCTGATCCATTACTCATATGGTGTTCACGACACTAACAAAGGAGAAGTCTCAGATGGCTACAGTGTCTCTAGATCAAACACAGAGGACCTCCCCCTCACTCTGTAGTCTGCTGCCTCCTCCCAGACATCTGTATATTTCTGCGCCAGCAGTGAGTC', 'anchor_index': 270 } df_V = genomic_dict['V'].loc[10:15] df_V = df_V.append(new_V_gene_dict, ignore_index=True) df_V.index.name = 'id' df_V mdl_parms_0 = IgorModel_Parms.make_default_VDJ(df_V, genomic_dict['D'], genomic_dict['J']) mdl_marginals_0 = IgorModel_Marginals.make_uniform_from_parms( mdl_parms_0) print("parms.Edges: ") print(mdl_hb.parms.Edges) print(mdl_parms_0.Edges) print("parms.Edges_dict:") print(mdl_hb.parms.Edges_dict) print(mdl_parms_0.Edges_dict) print("marginals.network_dict") print(mdl_hb.marginals.network_dict) print(mdl_marginals_0.network_dict) self.assertIsInstance(mdl_parms_0, IgorModel_Parms)
def test_get_df_ref_genome_from_files(self): ofile_mock_VDJ_fln_genomicVs = io.StringIO(str_mock_VDJ_fln_genomicVs) ofile_mock_VDJ_fln_V_gene_CDR3_anchors = io.StringIO( str_mock_VDJ_fln_V_gene_CDR3_anchors) ofile_mock_VDJ_fln_genomicDs = io.StringIO(str_mock_VDJ_fln_genomicDs) ofile_mock_VDJ_fln_genomicJs = io.StringIO(str_mock_VDJ_fln_genomicJs) ofile_mock_VDJ_fln_J_gene_CDR3_anchors = io.StringIO( str_mock_VDJ_fln_J_gene_CDR3_anchors) df_V_ref_genome = get_dataframe_from_fasta_and_csv_anchors( ofile_mock_VDJ_fln_genomicVs, ofile_mock_VDJ_fln_V_gene_CDR3_anchors) df_D_ref_genome = get_dataframe_from_fasta_and_csv_anchors( ofile_mock_VDJ_fln_genomicDs) df_J_ref_genome = get_dataframe_from_fasta_and_csv_anchors( ofile_mock_VDJ_fln_genomicJs, ofile_mock_VDJ_fln_J_gene_CDR3_anchors) self.assertIsInstance(df_V_ref_genome, pd.DataFrame) self.assertIsInstance(df_D_ref_genome, pd.DataFrame) self.assertIsInstance(df_J_ref_genome, pd.DataFrame) mdl_parms = IgorModel_Parms.make_default_VDJ(df_V_ref_genome, df_D_ref_genome, df_J_ref_genome) self.assertIsInstance(mdl_parms, IgorModel_Parms)
def test_IgorModel_Parms_get_IgorRefGenome_VDJ(self): species = "human" chain = "tcr_beta" fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain( species, chain) fln_dict = get_default_fln_dict_ref_genomes_species_chain( species, chain) mdl_parms = IgorModel_Parms( model_parms_file=fln_model_parms, fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"], fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"]) self.assertFalse(mdl_parms.df_V_anchors is None) self.assertFalse(mdl_parms.df_J_anchors is None) self.assertFalse(mdl_parms.event_GeneChoice_D is None) self.assertFalse(mdl_parms.df_D_ref_genome is None) ref_genome = mdl_parms.get_IgorRefGenome() mdl_parms.gen_EventDict_DataFrame() tmp_ref_genome_dir = tempfile.TemporaryDirectory(dir='.', prefix="ref_genome") ref_genome.write_ref_genome_dir(tmp_ref_genome_dir.name) print(ref_genome.to_dict()) fln_tmp_dict = dict() fln_tmp_dict[ 'fln_genomicVs'] = tmp_ref_genome_dir.name + "/" + "genomicVs.fasta" fln_tmp_dict[ 'fln_genomicDs'] = tmp_ref_genome_dir.name + "/" + "genomicDs.fasta" fln_tmp_dict[ 'fln_genomicJs'] = tmp_ref_genome_dir.name + "/" + "genomicJs.fasta" fln_tmp_dict[ 'fln_V_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "V_gene_CDR3_anchors.csv" fln_tmp_dict[ 'fln_J_gene_CDR3_anchors'] = tmp_ref_genome_dir.name + "/" + "J_gene_CDR3_anchors.csv" # time.sleep(20) self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicVs"])) self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicDs"])) self.assertTrue(os.path.isfile(fln_tmp_dict["fln_genomicJs"])) self.assertTrue(os.path.isfile( fln_tmp_dict["fln_V_gene_CDR3_anchors"])) self.assertTrue(os.path.isfile( fln_tmp_dict["fln_J_gene_CDR3_anchors"])) tmp_ref_genome_dir.cleanup()
def test_IgorModel_write_model(self): mdl_hb = IgorModel.load_default("human", "tcr_beta") fln_model_parms = 'model_parms.txt' fln_model_marginals = 'model_marginals.txt' fln_V_gene_CDR3_anchors = 'V_gene_CDR3_anchors.csv' fln_J_gene_CDR3_anchors = 'J_gene_CDR3_anchors.csv' ## TODO: ADD anchors mdl_hb.write_model(fln_model_parms, fln_model_marginals, fln_V_gene_CDR3_anchors, fln_J_gene_CDR3_anchors) mdl_hb_2 = IgorModel(fln_model_parms, fln_model_marginals, fln_V_gene_CDR3_anchors=fln_V_gene_CDR3_anchors, fln_J_gene_CDR3_anchors=fln_J_gene_CDR3_anchors) # TODO: CHANGE ANCHORS WITH DATAFRAME. mdl_hb_2 # FIXME: SOLVE THE REFERENCES TO ANCHORS AND SEQUENCES NAMES PROBLEM # mdl_hb.parms.df_V_ref_genome print("mdl_hb_2.parms.dictNameNickname: ", mdl_hb_2.parms.dictNameNickname) df = mdl_hb_2.get_event_realizations_DataFrame('j_choice') print("mdl_hb_2.genomic_dataframe_dict: ", mdl_hb_2.genomic_dataframe_dict) new_df = df[:4] mdl_hb_2.parms.gen_NameNickname_dict() mdl_hb_2.set_realization_event_from_DataFrame('j_choice', new_df) mdl_hb_2.set_event_realizations_from_DataFrame('j_choice', new_df) print("mdl_hb_2.parms.dictNameNickname: ", mdl_hb_2.parms.dictNameNickname) mdl_copy.set_genomic_dataframe_dict() mdl_parms = IgorModel_Parms() mdl_parms.Event_list # add_event() #add_Event() # import copy # mdl_copy = copy.deepcopy(mdl_hb) # mdl_copy # mdl_hb.parms.df_V_ref_genome # v_genLabel(mdl_hb.parms.df_V_ref_genome['name']) # print(mdl_hb_2.parms.df_V_ref_genome) # mdl_hb_2.genomic_dataframe_dict self.assertIsInstance(mdl_hb_2, IgorModel) self.assertTrue(os.path.isfile(fln_model_parms)) self.assertTrue(os.path.isfile(fln_model_marginals)) self.assertTrue(os.path.isfile(fln_V_gene_CDR3_anchors)) self.assertTrue(os.path.isfile(fln_J_gene_CDR3_anchors)) cmd = "rm {} {} {} {}".format(fln_model_parms, fln_model_marginals, fln_V_gene_CDR3_anchors, fln_J_gene_CDR3_anchors) p = subprocess.run(cmd, shell=True, capture_output=True, text=True)
def test_IgorModel_Parms_from_IgorRefGenome(self): ref_genome = IgorRefGenome.load_default("human", "tcr_alpha") self.assertIsInstance(ref_genome, IgorRefGenome) ref_genome_dict = ref_genome.to_dict() print(ref_genome.df_genomicVs) print(ref_genome.df_V_anchors) print(IgorRefGenome.V) mdl_parms = IgorModel_Parms.make_default_VDJ_from_IgorRefGenome( ref_genome)
def test_IgorModel_Parms_with_anchors(self): species = "human" chain = "tcr_beta" fln_model_parms, fln_model_marginals = get_default_models_paths_species_chain( species, chain) fln_dict = get_default_fln_dict_ref_genomes_species_chain( species, chain) print("fln_dict: ", fln_dict) mdl_parms = IgorModel_Parms( model_parms_file=fln_model_parms, fln_V_gene_CDR3_anchors=fln_dict["fln_V_gene_CDR3_anchors"], fln_J_gene_CDR3_anchors=fln_dict["fln_J_gene_CDR3_anchors"]) # no function in anchors file self.assertTrue( 'anchor_index' in mdl_parms.df_V_ref_genome.columns.to_list()) self.assertTrue( 'anchor_index' in mdl_parms.df_J_ref_genome.columns.to_list())