def test_IgorRefGenome_from_imgt(self): print(IgorRefGenome.get_imgt_list_species()) hs_trb_imgt_ref_genome = IgorRefGenome.load_VDJ_from_IMGT_website( "H**o+sapiens", "TRB") self.assertIsInstance(hs_trb_imgt_ref_genome, IgorRefGenome) hs_tra_imgt_ref_genome = IgorRefGenome.load_VJ_from_IMGT_website( "H**o+sapiens", "TRA") self.assertIsInstance(hs_tra_imgt_ref_genome, IgorRefGenome)
def test_IgorRefGenome_default(self): species = "human" chain = "tcr_beta" ref_genome_files_dict = get_default_fln_dict_ref_genomes_species_chain( species, chain) genomes = IgorRefGenome(**ref_genome_files_dict) genomes_default = IgorRefGenome.load_default(species, chain) self.assertEqual(genomes.V.loc[0].values[0], genomes_default.V.loc[0].values[0]) self.assertEqual(genomes.V.loc[0].values[1], genomes_default.V.loc[0].values[1])
def test_IgorRefGenome(self): ref_geno = IgorRefGenome() ref_geno.path_ref_genome = self.ref_genome_path_dir ref_geno.update_fln_names(path_ref_genome=ref_geno.path_ref_genome) ref_geno.load_dataframes_from_ref_genome_files() print(ref_geno.to_dict()) # Check equality of properties self.assertTrue(True, True)
def test_IgorRefGenome_write_ref_genome(self): # 1. Make a temporal directory tmp_test_dir = tempfile.TemporaryDirectory(".", prefix="testando") species = "human" chain = "tcr_beta" # 2. load default RefGenome genomes = IgorRefGenome.load_default(species, chain) print(genomes.df_genomicVs) print(genomes.df_V_anchors) print(genomes.df_V_ref_genome) # 3. write ref_genome in temporary directory. genomes.write_ref_genome_dir(tmp_test_dir.name) # 4. Assert if files in directory tmp_test_fln_dict = dict() tmp_test_fln_dict[ 'fln_genomicVs'] = tmp_test_dir.name + "/" + "genomicVs.fasta" tmp_test_fln_dict[ 'fln_genomicDs'] = tmp_test_dir.name + "/" + "genomicDs.fasta" tmp_test_fln_dict[ 'fln_genomicJs'] = tmp_test_dir.name + "/" + "genomicJs.fasta" tmp_test_fln_dict[ 'fln_V_gene_CDR3_anchors'] = tmp_test_dir.name + "/" + "V_gene_CDR3_anchors.csv" tmp_test_fln_dict[ 'fln_J_gene_CDR3_anchors'] = tmp_test_dir.name + "/" + "J_gene_CDR3_anchors.csv" for fln_key in self.fln_dict.keys(): self.assertTrue(os.path.isfile(tmp_test_fln_dict[fln_key])) # 5. Remove temporal directory. tmp_test_dir.cleanup()
def test_IgorModel_from_dataframes(self): ref_genome = IgorRefGenome.load_from_path(self.ref_genome_path_dir) # Because the model depends has VDJ genes self.assertIsInstance(ref_genome, IgorRefGenome) mdl_from_ref_genome = IgorModel.make_default_model_from_IgorRefGenome( ref_genome) self.assertIsInstance(mdl_from_ref_genome, IgorModel) print("mdl_from_ref_genome.V_anchors: ", mdl_from_ref_genome.V_anchors) print("mdl_from_ref_genome.J_anchors: ", mdl_from_ref_genome.J_anchors) path_mdl_data = self.tmp_dir.name + "/batch_mdldata" aaa = path_mdl_data + "/ref_genome" mdl_from_ref_genome mdl_from_ref_genome.write_mdldata_dir() ref_genome_again = mdl_from_ref_genome.parms.get_IgorRefGenome() self.assertIsInstance(ref_genome_again, IgorRefGenome) os.system("mkdir -p " + aaa) ref_genome_again.write_ref_genome_dir(aaa) fln_dict_tmp = get_default_ref_genome_fln_paths(ref_genome_path=aaa) print(fln_dict_tmp, str_mock_VDJ_fln_dict.keys()) for fln_key in str_mock_VDJ_fln_dict.keys(): print("fln_dict_tmp[" + fln_key + "]:", fln_dict_tmp[fln_key]) self.assertTrue(os.path.isfile(fln_dict_tmp[fln_key])) """
def test_IgorModel_Parms_from_IgorRefGenome(self): ref_genome = IgorRefGenome.load_default("human", "tcr_alpha") self.assertIsInstance(ref_genome, IgorRefGenome) ref_genome_dict = ref_genome.to_dict() print(ref_genome.df_genomicVs) print(ref_genome.df_V_anchors) print(IgorRefGenome.V) mdl_parms = IgorModel_Parms.make_default_VDJ_from_IgorRefGenome( ref_genome)
def test_IgorRefGenome_from_path(self): ref_genome = IgorRefGenome.load_from_path(self.ref_genome_path_dir) ref_genome.load_J_anchors_from_file(ref_genome.fln_J_gene_CDR3_anchors) print(ref_genome.df_J_anchors) row = ref_genome.df_J_ref_genome.loc[9] # print(row) self.assertEqual(row['name'], "TRBJ2-2P*01") self.assertEqual(row['value'], "CTGAGAGGCGCTGCTGGGCGTCTGGGCGGAGGACTCCTGGTTCTGG") self.assertTrue(np.isnan(row['anchor_index'])) self.assertTrue(np.isnan(row['function']))
def test_IgorRefGenome_clean_empty_anchors(self): ref_genome = IgorRefGenome.load_from_path(self.ref_genome_path_dir) row_9 = ref_genome.df_J_ref_genome.loc[9].copy() row_10 = ref_genome.df_J_ref_genome.loc[10].copy() self.assertEqual(row_9['name'], "TRBJ2-2P*01") self.assertEqual(row_9['value'], "CTGAGAGGCGCTGCTGGGCGTCTGGGCGGAGGACTCCTGGTTCTGG") self.assertTrue(np.isnan(row_9['anchor_index'])) self.assertTrue(np.isnan(row_9['function'])) ref_genome.clean_empty_anchors() row_new_9 = ref_genome.df_J_ref_genome.loc[9].copy() self.assertEqual(row_10['name'], row_new_9['name']) self.assertEqual(row_10['value'], row_new_9['value']) self.assertEqual(row_10['anchor_index'], row_new_9['anchor_index']) self.assertEqual(row_10['function'], row_new_9['function'])
def test_infer_VJ(self): # 0. Get your input sequences, in this case generated sequences print(self.pd_sequences) self.assertIsInstance(self.pd_sequences, pd.DataFrame) # 1. Get an IgorRefGenome from imgt website hb_ref_genome = IgorRefGenome.load_VJ_from_IMGT_website( "H**o+sapiens", "TRB") hb_ref_genome.clean_empty_anchors() self.assertIsInstance(hb_ref_genome, IgorRefGenome) # 2. Create a Model from a recently downloaded imgt models hb_mdl_ini = IgorModel.make_default_model_from_IgorRefGenome( hb_ref_genome) self.assertIsInstance(hb_mdl_ini, IgorModel) # 3. infer a new model using the initial model. new_mdl = infer(self.pd_sequences, hb_mdl_ini, batch_clean=False) self.assertIsInstance(new_mdl, IgorModel)
def test_IgorRefGenome_from_dataframe_genomics_dict(self): ref_genome_pd_dict = dict() ref_genome_pd_dict['V'] = get_dataframe_from_fasta_and_csv_anchors( self.fln_dict['fln_genomicVs'], self.fln_dict['fln_V_gene_CDR3_anchors']) ref_genome_pd_dict['J'] = get_dataframe_from_fasta_and_csv_anchors( self.fln_dict['fln_genomicJs'], self.fln_dict['fln_J_gene_CDR3_anchors']) ref_genome_pd_dict['D'] = get_dataframe_from_fasta_and_csv_anchors( self.fln_dict['fln_genomicDs']) ref_genome = IgorRefGenome.load_from_dataframe_genomics_dict( ref_genome_pd_dict) row = ref_genome.df_J_ref_genome.loc[9] self.assertEqual(row['name'], "TRBJ2-2P*01") self.assertEqual(row['value'], "CTGAGAGGCGCTGCTGGGCGTCTGGGCGGAGGACTCCTGGTTCTGG") self.assertTrue(np.isnan(row['anchor_index'])) self.assertTrue(np.isnan(row['function']))