def _test_secondary(obj_test: Screen, parameters: dict) -> bool: try: obj_test.secondary_mean( **parameters) # pass dictionary as arguments of method except Exception as e: # pylint: disable=broad-except logging.exception(e) print(traceback.format_exc()) return True return False
def _test_differential_output(obj_test: Screen, parameters: dict) -> bool: try: obj_test.differential( obj_test, **parameters) # pass dictionary as arguments of method except Exception as e: # pylint: disable=broad-except logging.exception(e) print(traceback.format_exc()) return True return False
def _generate_aph_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the aph dataset. """ aminoacids = list(DEMO_DATASETS['df_aph'].index) start_position = DEMO_DATASETS['df_aph'].columns[0] # Full sequence sequence_aph = 'MIEQDGLHAGSPAAWVERLFGYDWAQQTIGCSDAAVFRLSAQGRPVLFVKTDLSGALNELQ' + 'DEAARLSWLATTGVPCAAVLDVVTEAGRDWLLLGEVPGQDLLSSHLAPAEKVSIMADAMRR' + 'LHTLDPATCPFDHQAKHRIERARTRMEAGLVDQDDLDEEHQGLAPAELFARLKARMPDGED' + 'LVVTHGDACLPNIMVENGRFSGFIDCGRLGVADRYQDIALATRDIAEELGGEWADRFLVLY' + 'GIAAPDSQRIAFYRLLDEFF' # pylint: disable=line-too-long # Define secondary structure secondary_aph: List[List[str]] = [ ['L1'] * (11), ['α1'] * (16 - 11), ['L2'] * (22 - 16), ['β1'] * (26 - 22), ['L3'] * (34 - 26), ['β2'] * (40 - 34), ['L4'] * (46 - 40), ['β3'] * (52 - 46), ['L5'] * (58 - 52), ['α2'] * (72 - 58), ['L6'] * (79 - 72), ['β4'] * (85 - 79), ['L7'] * (89 - 85), ['β5'] * (95 - 89), ['L8'] * (99 - 95), ['β6'] * (101 - 99), ['L9'] * (107 - 101), ['α3'] * (131 - 107), ['L10'] * (135 - 131), ['α4'] * (150 - 135), ['L11'] * (158 - 150), ['α5'] * (163 - 158), ['L12'] * (165 - 163), ['α6'] * (177 - 165), ['L13'] * (183 - 177), ['β7'] * (187 - 183), ['L14'] * (191 - 187), ['α7'] * (194 - 191), ['L15'] * (1), ['β8'] * (199 - 195), ['L16'] * (201 - 199), ['β9'] * (206 - 201), ['L17'] * (212 - 206), ['β10'] * (216 - 212), ['α8'] * (245 - 216), ['L18'] * (4), ['α9'] * (264 - 249) ] return Screen(datasets=np.log10(DEMO_DATASETS['df_aph']), sequence=sequence_aph, aminoacids=aminoacids, start_position=start_position, fillna=0, secondary=secondary_aph)
def _generate_rev_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the rev dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_rev'].index) start_position = DEMO_DATASETS['df_rev'].columns[0] # Full sequence sequence_rev = 'MAGRSGDSDEDLLKAVRLIKFLYQSNPPPNPEGTRQARRNRRRRWRERQRQIHSISERIL' + 'STYLGRSAEPVPLQLPPLERLTLDCNEDCGTSGTQGVGSPQILVESPTILESGAKE' # pylint: disable=line-too-long # Define secondary structure secondary_rev: List[List[str]] = [['L1'] * (8), ['α1'] * (25 - 8), ['L2'] * (33 - 25), ['α2'] * (68 - 33), ['L3'] * (116 - 41)] return Screen(datasets=DEMO_DATASETS['df_rev'], sequence=sequence_rev, aminoacids=aminoacids, start_position=start_position, fillna=0, secondary=secondary_rev)
def _generate_asynuclein_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the synuclein dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_asynuclein'].index) start_position = DEMO_DATASETS['df_asynuclein'].columns[0] # Full sequence sequence_asynuclein = 'MDVFMKGLSKAKEGVVAAAEKTKQGVAEAAGKTKEGVLYVGSKTKEGVVHGVATVAEKTK' + 'EQVTNVGGAVVTGVTAVAQKTVEGAGSIAAATGFVKKDQLGKNEEGAPQEGILEDMPVDP' + 'DNEAYEMPSEEGYQDYEPEA' # pylint: disable=line-too-long # Define secondary structure secondary_asynuclein: List[List[str]] = [['L1'] * (1), ['α1'] * (37 - 1), ['L2'] * (44 - 37), ['α2'] * (92 - 44), ['L3'] * (140 - 92)] return Screen(datasets=DEMO_DATASETS['df_asynuclein'], sequence=sequence_asynuclein, aminoacids=aminoacids, start_position=start_position, fillna=0, secondary=secondary_asynuclein)
def _generate_tat_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the tat dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_tat'].index) start_position = DEMO_DATASETS['df_tat'].columns[0] # Full sequence sequence_tat = 'MEPVDPRLEPWKHPGSQPKTACTNCYCKKCCFHCQVCFITKALGISYGRKKRRQRRRAHQ' + 'NSQTHQASLSKQPTSQPRGDPTGPKE' # pylint: disable=line-too-long # Define secondary structure secondary_tat: List[List[str]] = [['L1'] * (8), ['α1'] * (13 - 8), ['L2'] * (28 - 14), ['α2'] * (41 - 28), ['L3'] * (90 - 41)] return Screen(datasets=DEMO_DATASETS['df_tat'], sequence=sequence_tat, aminoacids=aminoacids, start_position=start_position, fillna=0, secondary=secondary_tat)
def _generate_ube2i_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the ube2i dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_ube2i'].index) start_position = DEMO_DATASETS['df_ube2i'].columns[0] # Full sequence sequence_ube2i_x = 'MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKGTP' + 'WEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEEDKDWRPAITIKQ' + 'ILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRAQAKKFAPS' # pylint: disable=line-too-long # Define secondary structure secondary_ube2i: List[List[str]] = [ ['α1'] * (20 - 1), ['L1'] * (24 - 20), ['β1'] * (30 - 24), ['L2'] * 5, ['β2'] * (46 - 35), ['L3'] * (56 - 46), ['β3'] * (63 - 56), ['L4'] * (73 - 63), ['β4'] * (77 - 73), ['L5'] * (93 - 77), ['α2'] * (98 - 93), ['L6'] * (107 - 98), ['α3'] * (122 - 107), ['L7'] * (129 - 122), ['α4'] * (155 - 129), ['L8'] * (160 - 155) ] # Create objects return Screen(datasets=DEMO_DATASETS['df_ube2i'], sequence=sequence_ube2i_x, aminoacids=aminoacids, start_position=start_position, fillna=1, secondary=secondary_ube2i)
def _generate_sumo_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the sumo1 dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_sumo1'].index) start_position = DEMO_DATASETS['df_sumo1'].columns[0] # Full sequence sequence_sumo1 = 'MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKESYCQRQGVPMN' + 'SLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHSTV' # pylint: disable=line-too-long # Define secondary structure secondary_sumo1: List[List[str]] = [['L0'] * (20), ['β1'] * (28 - 20), ['L1'] * 3, ['β2'] * (39 - 31), ['L2'] * 4, ['α1'] * (55 - 43), ['L3'] * (6), ['β3'] * (65 - 61), ['L4'] * (75 - 65), ['α2'] * (80 - 75), ['L5'] * (85 - 80), ['β4'] * (92 - 85), ['L6'] * (101 - 92)] return Screen(datasets=DEMO_DATASETS['df_sumo1'], sequence=sequence_sumo1, aminoacids=aminoacids, start_position=start_position, fillna=1, secondary=secondary_sumo1)
def _generate_bla_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the beta lactamase dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids: List[str] = list(DEMO_DATASETS['df_bla'].index) start_position = DEMO_DATASETS['df_bla'].columns[0] # Define sequence. If you dont know the start of the sequence, just add X's sequence_bla_x = 'MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRP' + 'EERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVREL' + 'CSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTM' + 'PAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGS' + 'RGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW' # pylint: disable=line-too-long # Define secondary structure secondary_bla: List[List[str]] = [ ['L0'] * 23, ['α1'] * (38 - 23), ['L1'] * 2, ['β1'] * (48 - 40), ['L2'] * 5, ['β2'] * (57 - 53), ['L3'] * (68 - 57), ['α2'] * (84 - 68), ['L4'] * (95 - 84), ['α3'] * (100 - 95), ['L5'] * (103 - 100), ['α4'] * (110 - 103), ['L6'] * (116 - 110), ['α5'] * (140 - 116), ['L7'] * (1), ['α6'] * (153 - 141), ['L8'] * (164 - 153), ['α7'] * (169 - 164), ['L9'] * (179 - 169), ['α8'] * (194 - 179), ['L10'] * 3, ['α9'] * (210 - 197), ['L11'] * (227 - 210), ['β3'] * (235 - 227), ['L12'] * (240 - 235), ['β4'] * (249 - 240), ['L13'] * (254 - 249), ['β5'] * (262 - 254), ['L14'] * (266 - 262), ['α10'] * (286 - 266) ] return Screen(datasets=DEMO_DATASETS['df_bla'], sequence=sequence_bla_x, aminoacids=aminoacids, start_position=start_position, fillna=0, secondary=secondary_bla)
def _test_screen_output(parameters: dict) -> bool: try: Screen(**parameters) except Exception as e: # pylint: disable=broad-except logging.exception(e) print(traceback.format_exc()) return True return False
def _generate_b11l5f_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the aph dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_b11l5f'].index) # Sequence sequence_b11l5f = 'CRAASLLPGTWQVTMTNEDGQTSQGQMHFQPRSPYTLDVKAQGTISDGRPI' + 'SGKGKVTCKTPDTMDVDITYPSLGNMKVQGQVTLDSPTQFKFDVTTSDGSKVTGTLQRQE' # pylint: disable=line-too-long start_position = DEMO_DATASETS['df_b11l5f'].columns[0] return Screen(datasets=DEMO_DATASETS['df_b11l5f'], sequence=sequence_b11l5f, aminoacids=aminoacids, start_position=start_position, fillna=0)
def _generate_mapk1_obj(self) -> Screen: # pylint: disable=no-self-use """ Create object for the mapk1 dataset. """ # Order of amino acid substitutions in the hras_enrichment dataset aminoacids = list(DEMO_DATASETS['df_mapk1'].index) start_position = DEMO_DATASETS['df_mapk1'].columns[0] # Full sequence sequence_mapk1_x = 'MAAAAAAGAGPEMVRGQVFDVGPRYTNLSYIGEGAYGMVCSAYDNVNKVRVAIK' + 'KISPFEHQTYCQRTLREIKILLRFRHENIIGINDIIRAPTIEQMKDVYIVQDLMETDLYKLLKTQ' + 'HLSNDHICYFLYQILRGLKYIHSANVLHRDLKPSNLLLNTTCDLKICDFGLARVADPDHDHTGFL' + 'TEYVATRWYRAPEIMLNSKGYTKSIDIWSVGCILAEMLSNRPIFPGKHYLDQLNHILGILGSPSQ' + 'EDLNCIINLKARNYLLSLPHKNKVPWNRLFPNADSKALDLLDKMLTFNPHKRIEVEQALAHPYLE' + 'QYYDPSDEPIAEAPFKFDMELDDLPKEKLKELIFEETARFQPGYRS' # pylint: disable=line-too-long # Create objects return Screen(datasets=DEMO_DATASETS['df_mapk1'], sequence=sequence_mapk1_x, aminoacids=aminoacids, start_position=start_position, fillna=0)
def _generate_hras_gapgef_object(self) -> Screen: # pylint: disable=no-self-use """ Create object hras_gapgef. """ # Define protein sequence hras_sequence: str = 'MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG' # pylint: disable=line-too-long # Define secondary structure secondary: List[List[str]] = [ ['L0'], ['β1'] * (9 - 1), ['L1'] * (15 - 9), ['α1'] * (25 - 15), ['L2'] * (36 - 25), ['β2'] * (46 - 36), ['L3'] * (48 - 46), ['β3'] * (58 - 48), ['L4'] * (64 - 58), ['α2'] * (74 - 64), ['L5'] * (76 - 74), ['β4'] * (83 - 76), ['L6'] * (86 - 83), ['α3'] * (103 - 86), ['L7'] * (110 - 103), ['β5'] * (116 - 110), ['L8'] * (126 - 116), ['α4'] * (137 - 126), ['L9'] * (140 - 137), ['β6'] * (143 - 140), ['L10'] * (151 - 143), ['α5'] * (172 - 151), ['L11'] * (190 - 172) ] return Screen(datasets=DEMO_DATASETS['array_hras_gapgef'], sequence=hras_sequence, aminoacids=list('ACDEFGHIKLMNPQRSTVWY*'), secondary=secondary)
def create_anc_ras_object() -> Tuple[Screen, List[Tuple[int, int]]]: """ This dataset is not available since it is not published yet. """ ancras: str = 'MTEYKLVVVGGGGVGKSALTIQFIQSHFVDEYDPTIEDSYRKQVVIDDEVAILDILDTAGQEEYSAMREQYMRNGEGFLLVYSITDRSSFDEISTYHEQILRVKDTDDVPMVLVGNKADLESRAVSMQEGQNLAKQLNVPFIETSAKQRMNVDEAFYTLVRVVRRH' hras: str = 'MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQH' secondary: list = [ ['β1'] * (9 - 1), ['L1'] * (16 - 9), ['α1'] * (25 - 16), ['L2'] * (37 - 25), ['β2'] * (46 - 37), ['L3'] * (49 - 46), ['β3'] * (58 - 49), ['L4'] * (65 - 58), ['α2'] * (74 - 65), ['L5'] * (77 - 74), ['β4'] * (83 - 77), ['L6'] * (87 - 83), ['α3'] * (103 - 87), ['L7'] * (111 - 103), ['β5'] * (116 - 111), ['L8'] * (127 - 116), ['α4'] * (137 - 127), ['L9'] * (141 - 137), ['β6'] * (143 - 141), ['L10'] * (152 - 143), ['α5'] * (173 - 152), ['L11'] * (189 - 173) ] kwargs: dict = {'secondary': secondary} aminoacids: List[str] = list('ACDEFGHIKLMNPQRSTVWY*') map_sequence_changes: List[Tuple[int, int]] = [] for index, (i, j) in enumerate(zip(hras, ancras), start=1): if not i == j: map_sequence_changes.append((index, index)) return Screen(ANC_RAS_ENRICHMENT, ancras, aminoacids, **kwargs), map_sequence_changes
secondary: list = [['β1'] * (9 - 1), ['L1'] * (16 - 9), ['α1'] * (25 - 16), ['L2'] * (37 - 25), ['β2'] * (46 - 37), ['L3'] * (49 - 46), ['β3'] * (58 - 49), ['L4'] * (65 - 58), ['α2'] * (74 - 65), ['L5'] * (77 - 74), ['β4'] * (83 - 77), ['L6'] * (87 - 83), ['α3'] * (103 - 87), ['L7'] * (111 - 103), ['β5'] * (116 - 111), ['L8'] * (127 - 116), ['α4'] * (137 - 127), ['L9'] * (141 - 137), ['β6'] * (143 - 141), ['L10'] * (152 - 143), ['α5'] * (173 - 152), ['L11'] * (189 - 173)] kwargs: dict = {'secondary': secondary} aminoacids: List[str] = list('ACDEFGHIKLMNPQRSTVWY*') # Create objects HRAS_166_GAP: Screen = Screen([HRas166_GAP_r0, HRas166_GAP_r1], hras, aminoacids, **kwargs) HRAS_166_RBD: Screen = Screen([HRas166_RBD_r0, HRas166_RBD_r1, HRas166_RBD_r2], hras, aminoacids, **kwargs) HRAS_166_GAPGEF: Screen = Screen([HRas166_GAPGEF_r0, HRas166_GAPGEF_r1], hras, aminoacids, **kwargs) HRAS_188_BAF3: Screen = Screen([HRas188_BaF3_r0, HRas188_BaF3_r1], hras, aminoacids, **kwargs) HRAS_180_GAP: Screen = Screen([HRas180_GAP_r0, HRas180_GAP_r1, HRas180_GAP_r3], hras, aminoacids, **kwargs) HRAS_180_RBD: Screen = Screen( [HRas180_RBD_r0, HRas180_RBD_r1, HRas180_RBD_r2, HRas180_RBD_r3], hras, aminoacids, **kwargs) KRAS_165_GAP: Screen = Screen([KRas165_GAP_r0, KRas165_GAP_r1, KRas165_GAP_r2], kras, aminoacids, **kwargs) KRAS_165_RBD: Screen = Screen([KRas165_RBD_r0, KRas165_RBD_r1], kras, aminoacids, **kwargs)
def run_demo(figure: FIGURE_OPTIONS = 'heatmap', show: bool = True) -> None: """ Performs a demonstration of the mutagenesis_visualization software. Parameters ----------- figure : str, default 'heatmap' There are a few example plots that can be displayed to test the package is working on your station. The options are 'heatmap', 'miniheatmap', 'mean', 'kernel', 'pca' 'position', 'secondary_mean', 'correlation', 'individual_correlation' and 'pymol'. Check the documentation for more information. show : boolean, default True If True, will execute plt.show() for each figure. """ # Load enrichment scores hras_enrichment_rbd: npt.NDArray = np.genfromtxt(HRAS_RBD_COUNTS_CSV, delimiter=',') # Define protein sequence hras_sequence: str = 'MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQIKRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQGVEDAFYTLVREIRQHKLRKLNPPDESGPG' # pylint: disable=line-too-long # Set aminoacids aminoacids: List[str] = list('ACDEFGHIKLMNPQRSTVWY*') # Define secondary structure secondary: list = [['L0'], ['β1'] * (9 - 1), ['L1'] * (15 - 9), ['α1'] * (25 - 15), ['L2'] * (36 - 25), ['β2'] * (46 - 36), ['L3'] * (48 - 46), ['β3'] * (58 - 48), ['L4'] * (64 - 58), ['α2'] * (74 - 64), ['L5'] * (76 - 74), ['β4'] * (83 - 76), ['L6'] * (86 - 83), ['α3'] * (103 - 86), ['L7'] * (110 - 103), ['β5'] * (116 - 110), ['L8'] * (126 - 116), ['α4'] * (137 - 126), ['L9'] * (140 - 137), ['β6'] * (143 - 140), ['L10'] * (151 - 143), ['α5'] * (172 - 151), ['L11'] * (190 - 172)] # Create object hras_rbd: Screen = Screen( datasets=hras_enrichment_rbd, sequence=hras_sequence, aminoacids=aminoacids, secondary=secondary ) if figure.lower() == 'heatmap': # Create heatmap plot hras_rbd.heatmap( title='H-Ras 2-166', mask_selfsubstitutions=False, show_cartoon=True, show=show ) elif figure.lower() == 'miniheatmap': # Condensed heatmap hras_rbd.miniheatmap(mask_selfsubstitutions=False, title='Wt residue H-Ras', show=show) elif figure.lower() == 'mean': # Mean enrichment by position hras_rbd.enrichment_bar( figsize=[6, 2.5], mode='mean', show_cartoon=True, yscale=[-2, 0.5], title='', show=show ) elif figure.lower() == 'kernel': # Plot kernel dist using sns.distplot. hras_rbd.kernel(histogram=True, title='H-Ras 2-166', xscale=[-2, 1], show=show) elif figure.lower() == 'pca': # PCA by amino acid substitution hras_rbd.pca(dimensions=(0, 1), figsize=(2, 2), adjustlabels=True, title='', show=show) elif figure.lower() == 'position': # Create plot for position 117 hras_rbd.position_bar( position=117, yscale=(-1.5, 0.8), figsize=(3.5, 2), title='Position 117', output_file=None, show=show, ) elif figure.lower() == 'secondary_mean': hras_rbd.secondary_mean( yscale=[-1, 0], figsize=[3, 2], title='Mean of secondary motifs', show=show, show_error_bars=False, ) elif figure.lower() == 'correlation': # Correlation between amino acids hras_rbd.correlation(colorbar_scale=[0.5, 1], title='Correlation', show=show) elif figure.lower() == 'individual_correlation': # Explained variability by amino acid hras_rbd.individual_correlation( yscale=[0, 0.6], title='Explained variability by amino acid', output_file=None, show=show, ) elif figure.lower() == 'pymol': hras_rbd.pymol(PDB_5P21) else: raise NameError('Select a valid name for a demo figure.')