def main(): # reference interactome name # options: HI-II-14, IntAct ref_interactome_name = 'HI-II-14' # similarity measure used to calculate GO similarity sim_measure = 'SimGIC' # root ontologies on which GO similarity was calculated ont_root = [ 'biological_process', 'molecular_function', 'cellular_component' ] # root ontology labels used to label output files and figures ont_abv = { 'biological_process': 'bp', 'molecular_function': 'mf', 'cellular_component': 'cc' } # structural interactome names struc_name = {'HI-II-14': 'Y2H-SI', 'IntAct': 'IntAct-SI'} # interactome names interactome_names = [ 'Random interactions', 'Reference interactome', 'Structural interactome' ] # interactome colors interactome_colors = ['limegreen', 'steelblue', 'orangered'] # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # parent directory of processed data files procDir = dataDir / 'processed' # directory of processed data files specific to interactome interactomeDir = procDir / ref_interactome_name # figure directory figDir = Path('../figures') / ref_interactome_name / 'gosim' # input data files gosimFiles = [ interactomeDir / 'gosim' / ('allPPI_gosim_%s_%s.pkl' % (ont_abv[root], sim_measure)) for root in ont_root ] # create output directories if not existing if not figDir.exists(): os.makedirs(figDir) allgosim = {} for root, gosimFile in zip(ont_root, gosimFiles): with open(gosimFile, 'rb') as f: allgosim[root] = pickle.load(f) means, errors = [], [] for interactome_name in interactome_names: gosim = [ allgosim[root][interactome_name]["gosim"] for root in ont_root ] means.append([np.mean(s) for s in gosim]) errors.append([sderror(s) for s in gosim]) multi_bar_plot(means, errors=errors, xlabels=[r.title().replace('_', '\n') for r in ont_root], ylabel='Gene ontology similarity\nof interaction partners', ylabels=[round(x, 1) for x in np.arange(0, 0.6, 0.1)], colors=interactome_colors, edgecolor='k', ewidth=1.5, barwidth=0.2, bargap=0.03, fontsize=18, capsize=5, msize=8, leg=[ 'Random pairs', '%s reference interactome' % ref_interactome_name, struc_name[ref_interactome_name] ], show=showFigs, figdir=figDir, figname='interactome_gosim_%s' % sim_measure)
def main(): # reference interactome name # options: HI-II-14, IntAct interactome_name = 'HI-II-14' # similarity measure to calculate GO similarity # options: Resnik, Lin, Jiang-Conrath, SimGIC, SimUI, SimIC, SimRel, Dice, SimTO # SimNTO, Jaccard, Czekanowski-Dice, Cosine, GSESAME, SimICND, SimICNP sim_measure = 'SimGIC' # mixing strategy for merging GO term semantic similarities # options: max, avg, BMA (best match average) mix_method = 'BMA' # root ontology on which GO similarity is calculated # options: biological_process, molecular_function, cellular_component ont_root = 'biological_process' # root ontology labels used to label output files and figures ont_abv = { 'biological_process': 'bp', 'molecular_function': 'mf', 'cellular_component': 'cc' } # list of ontological relationships to ignore ont_ignore = None # list of evidence codes to ignore ec_ignore = None # number of random interactions numRandPairs = 10000 # interactome colors interactome_colors = ['limegreen', 'steelblue', 'orangered'] # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # directory of data files from external sources extDir = dataDir / 'external' # parent directory of all processed data files procDir = dataDir / 'processed' # directory of processed data files specific to interactome interactomeDir = procDir / interactome_name # directory of GO similarity output data files gosimDir = interactomeDir / 'gosim' # figure directory figDir = Path('../figures') / interactome_name / 'gosim' # get root ontology label to label output files and figures ont_label = ont_abv[ont_root] # input data files ontologyFile = extDir / 'go-basic.obo' annotationFile = extDir / 'goa_human.gaf' interactomeFile = interactomeDir / 'human_interactome.txt' structuralInteractomeFile = interactomeDir / 'human_interface_annotated_interactome.txt' # output data files refPPIListFile = gosimDir / 'refPPIs.txt' randPairListFile = gosimDir / 'randPairs.txt' refPPIgosimParamFile = gosimDir / ('fastsemsim_parameters_refPPIs_%s_%s' % (ont_label, sim_measure)) randPairgosimParamFile = gosimDir / ( 'fastsemsim_parameters_randPairs_%s_%s' % (ont_label, sim_measure)) refPPIfastsemsimOutFile = gosimDir / ('fastsemsim_output_refPPIs_%s_%s' % (ont_label, sim_measure)) randPairfastsemsimOutFile = gosimDir / ( 'fastsemsim_output_randPairs_%s_%s' % (ont_label, sim_measure)) refPPIgosimFile = gosimDir / ('gosim_refPPIs_%s_%s.pkl' % (ont_label, sim_measure)) randPairgosimFile = gosimDir / ('gosim_randPairs_%s_%s.pkl' % (ont_label, sim_measure)) allPPIgosimFile = gosimDir / ('allPPI_gosim_%s_%s.pkl' % (ont_label, sim_measure)) # create directories if not existing if not gosimDir.exists(): os.makedirs(gosimDir) if not figDir.exists(): os.makedirs(figDir) #------------------------------------------------------------------------------------ # load reference and structural interactomes #------------------------------------------------------------------------------------ interactome = pd.read_table(interactomeFile) interactomeProteins = list( set(interactome[["Protein_1", "Protein_2"]].values.flatten())) print('\n' + 'reference interactome:') print('%d PPIs' % len(interactome)) print('%d proteins' % len(interactomeProteins)) structuralInteractome = read_single_interface_annotated_interactome( structuralInteractomeFile) strucInteractomeProteins = list( set(structuralInteractome[["Protein_1", "Protein_2"]].values.flatten())) print('\n' + 'interface-annotated interactome:') print('%d PPIs' % len(structuralInteractome)) print('%d proteins' % len(strucInteractomeProteins)) #------------------------------------------------------------------------------------ # Calculate GO similarity for random interactions #------------------------------------------------------------------------------------ randPairs = pd.DataFrame() randPairs["Protein_1"], randPairs["Protein_2"] = zip( *sample_random_pairs(interactomeProteins, numRandPairs)) randPairs = randPairs[randPairs["Protein_1"] != randPairs["Protein_2"]] # produce protein GO similarity dictionary print('\n' + 'producing GO similarity dictionary for random pairs') randPairs[["Protein_1", "Protein_2"]].to_csv(randPairListFile, index=False, header=False, sep='\t') produce_fastsemsim_protein_gosim_dict( randPairListFile, randPairgosimFile, sim_measure=sim_measure, mix_method=mix_method, ont_root=ont_root, ont_ignore=ont_ignore, ec_ignore=ec_ignore, ontologyFile=ontologyFile, annotationFile=annotationFile, paramOutFile=randPairgosimParamFile, fastsemsimOutFile=randPairfastsemsimOutFile) with open(randPairgosimFile, 'rb') as f: gosim = pickle.load(f) sim = [] for p in map(tuple, map(sorted, randPairs[["Protein_1", "Protein_2"]].values)): sim.append(gosim[p] if p in gosim else np.nan) randPairs["gosim"] = sim randPairs = randPairs[np.isnan(randPairs["gosim"]) == False].reset_index( drop=True) #------------------------------------------------------------------------------------ # Calculate GO similarity for all interaction partners in the reference interactome #------------------------------------------------------------------------------------ refPPIs = pd.DataFrame() refPPIs["Protein_1"], refPPIs["Protein_2"] = zip( *interactome[["Protein_1", "Protein_2"]].values) # produce protein GO similarity dictionary if not refPPIgosimFile.is_file(): print('\n' + 'producing GO similarity dictionary for reference PPIs') refPPIs[["Protein_1", "Protein_2"]].to_csv(refPPIListFile, index=False, header=False, sep='\t') produce_fastsemsim_protein_gosim_dict( refPPIListFile, refPPIgosimFile, sim_measure=sim_measure, mix_method=mix_method, ont_root=ont_root, ont_ignore=ont_ignore, ec_ignore=ec_ignore, ontologyFile=ontologyFile, annotationFile=annotationFile, paramOutFile=refPPIgosimParamFile, fastsemsimOutFile=refPPIfastsemsimOutFile) with open(refPPIgosimFile, 'rb') as f: gosim = pickle.load(f) sim = [] for p in map(tuple, map(sorted, refPPIs[["Protein_1", "Protein_2"]].values)): sim.append(gosim[p] if p in gosim else np.nan) refPPIs["gosim"] = sim refPPIs = refPPIs[np.isnan(refPPIs["gosim"]) == False].reset_index( drop=True) #------------------------------------------------------------------------------------ # Calculate GO similarity for all interaction partners in the structural interactome #------------------------------------------------------------------------------------ strucPPIs = pd.DataFrame() strucPPIs["Protein_1"], strucPPIs["Protein_2"] = zip( *structuralInteractome[["Protein_1", "Protein_2"]].values) sim = [] for p in map(tuple, map(sorted, strucPPIs[["Protein_1", "Protein_2"]].values)): sim.append(gosim[p] if p in gosim else np.nan) strucPPIs["gosim"] = sim strucPPIs = strucPPIs[np.isnan(strucPPIs["gosim"]) == False].reset_index( drop=True) #------------------------------------------------------------------------------------ # Save GO similarity results to file #------------------------------------------------------------------------------------ allgosim = { k: sim for k, sim in zip([ 'Random interactions', 'Reference interactome', 'Structural interactome' ], [randPairs, refPPIs, strucPPIs]) } with open(allPPIgosimFile, 'wb') as fout: pickle.dump(allgosim, fout) #------------------------------------------------------------------------------------ # Compare GO similarity of interaction partners between reference interactome, # structural interactome and random interactions #------------------------------------------------------------------------------------ # remove NaN values randGOsim = randPairs["gosim"].tolist() refGOsim = refPPIs["gosim"].tolist() strucGOsim = strucPPIs["gosim"].tolist() # print results print('\n' + 'Mean %s similarity for interaction partners:' % ont_root) print('Random pairs: %f (SE = %g, n = %d)' % (np.mean(randGOsim), sderror(randGOsim), len(randGOsim))) print('Reference interactome: %f (SE = %g, n = %d)' % (np.mean(refGOsim), sderror(refGOsim), len(refGOsim))) print('Structural interactome: %f (SE = %g, n = %d)' % (np.mean(strucGOsim), sderror(strucGOsim), len(strucGOsim))) print('\n' + 'Statistical significance') print('reference interactome vs random interactions:') bootstrap_test(refGOsim, randGOsim, iter=100000) print('structural interactome vs reference interactome:') bootstrap_test(strucGOsim, refGOsim, iter=100000) # plot results bar_plot([np.mean(randGOsim), np.mean(refGOsim), np.mean(strucGOsim)], [sderror(randGOsim), sderror(refGOsim), sderror(strucGOsim)], xlabels=[ 'Random\ninteractions', 'Reference\ninteractome', 'Structural\ninteractome' ], ylabel='%s similarity of\ninteraction partners' % ont_root, colors=interactome_colors, edgecolor='k', barwidth=0.5, fontsize=24, show=showFigs, figdir=figDir, figname='interactome_gosim_%s_%s' % (ont_label, sim_measure))
def main(): # reference interactome name # options: HI-II-14, IntAct ref_interactome_name = 'HI-II-14' # tissue expression databases expr_db = ['Illumina', 'GTEx', 'HPA', 'Fantom5'] # structural interactome names struc_name = {'HI-II-14': 'Y2H-SI', 'IntAct': 'IntAct-SI'} # interactome names interactome_names = [ 'Random interactions', 'Reference interactome', 'Structural interactome' ] # interactome colors interactome_colors = ['limegreen', 'steelblue', 'orangered'] # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # parent directory of processed data files procDir = dataDir / 'processed' # directory of processed data files specific to interactome interactomeDir = procDir / ref_interactome_name # figure directory figDir = Path('../figures') / ref_interactome_name / 'coexpr' # input data files coexprFiles = [ interactomeDir / 'coexpr' / ('interactome_coexpr_%s.pkl' % db) for db in expr_db ] # create output directories if not existing if not figDir.exists(): os.makedirs(figDir) allcoexpr = {} for db, coexprFile in zip(expr_db, coexprFiles): with open(coexprFile, 'rb') as f: allcoexpr[db] = pickle.load(f) means, errors = [], [] for interactome_name in interactome_names: coexpr = [allcoexpr[db][interactome_name]["coexpr"] for db in expr_db] means.append([np.mean(c) for c in coexpr]) errors.append([sderror(c) for c in coexpr]) multi_bar_plot(means, errors=errors, xlabels=expr_db, ylabel='Tissue co-expression of\ninteraction partners', ylabels=[round(x, 1) for x in np.arange(0, 0.9, 0.2)], colors=interactome_colors, edgecolor='k', ewidth=1.5, barwidth=0.2, bargap=0.03, fontsize=18, capsize=5, msize=8, leg=[ 'Random pairs', '%s reference interactome' % ref_interactome_name, struc_name[ref_interactome_name] ], show=showFigs, figdir=figDir, figname='interactome_coexpr')
def main(): # substitution matrix name matrixName = 'PAM30' # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # parent directory of all processed data files procDir = dataDir / 'processed' # figure directory figDir = Path('../figures') / 'combined' # input data files naturalMutationsFile = procDir / 'dbsnp_mutations4.txt' diseaseMutationsFile = procDir / 'clinvar_mutations6.txt' # output data files subsMatrixFile = procDir / (matrixName + '.pkl') # create output directories if not existing if not procDir.exists(): os.makedirs(procDir) if not figDir.exists(): os.makedirs(figDir) #------------------------------------------------------------------------------------ # produce substitution matrix #------------------------------------------------------------------------------------ if not subsMatrixFile.is_file(): produce_substitution_matrix (matrixName, subsMatrixFile) with open(subsMatrixFile, 'rb') as f: subsTable = pickle.load(f) #------------------------------------------------------------------------------------ # load mutations #------------------------------------------------------------------------------------ naturalMutations, diseaseMutations = remove_mutation_overlaps (naturalMutationsFile, diseaseMutationsFile) #------------------------------------------------------------------------------------ # calculate mutation substitution scores #------------------------------------------------------------------------------------ natMutScore = [subsTable[x] for x in zip(naturalMutations["wt_res"], naturalMutations["mut_res"])] disMutScore = [subsTable[x] for x in zip(diseaseMutations["wt_res"], diseaseMutations["mut_res"])] print() print( 'Avg. score for natural mutations: %.1f (SE = %g, n = %d)' % (np.mean(natMutScore), sderror(natMutScore), len(natMutScore)) ) print( 'Avg. score for disease mutations: %.1f (SE = %g, n = %d)' % (np.mean(disMutScore), sderror(disMutScore), len(disMutScore)) ) bootstrap_test (natMutScore, disMutScore, 10000) box_plot([natMutScore, disMutScore], xlabels = ('Non-disease\nmutations','Disease\nmutations'), ylabels = [-15, -10, -5, 0, 5], ylabel = 'PAM30 substitution score', ylim = [-16, 5], colors = ['turquoise', 'magenta'], fontsize = 26, show = showFigs, figdir = figDir, figname = 'substitution_score')
def main(): # reference interactome name # options: HI-II-14, IntAct interactome_name = 'HI-II-14' # tissue expression database name # options: Illumina, GTEx, HPA, Fantom5 expr_db = 'Illumina' # minimum number of tissue expression values required for protein pair tissue # co-expression to be considered coexprMinTissues = 5 # number of random interactions numRandPairs = 10000 # interactome colors interactome_colors = ['limegreen', 'steelblue', 'orangered'] # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # directory of data files from external sources extDir = dataDir / 'external' # parent directory of all processed data files procDir = dataDir / 'processed' # directory of processed data files specific to interactome interactomeDir = procDir / interactome_name # directory of tissue coexpression output data files coexprDir = interactomeDir / 'coexpr' # figure directory figDir = Path('../figures') / interactome_name / 'coexpr' # input data files illuminaExprFile = extDir / 'E-MTAB-513.tsv.txt' gtexDir = extDir / 'GTEx_Analysis_v7_eQTL_expression_matrices' hpaExprFile = extDir / 'normal_tissue.tsv' fantomExprFile = extDir / 'hg38_fair+new_CAGE_peaks_phase1and2_tpm_ann.osc.txt' fantomSampleTypeFile = extDir / 'fantom5_sample_type.xlsx' uniprotIDmapFile = procDir / 'to_human_uniprotID_map.pkl' uniqueGeneSwissProtIDFile = procDir / 'uniprot_unique_gene_reviewed_human_proteome.list' interactomeFile = interactomeDir / 'human_interactome.txt' structuralInteractomeFile = interactomeDir / 'human_interface_annotated_interactome.txt' # output data files proteinExprFile = procDir / ('protein_expr_%s.pkl' % expr_db) coexprFile = coexprDir / ('interactome_coexpr_%s.pkl' % expr_db) # create directories if not existing if not coexprDir.exists(): os.makedirs(coexprDir) if not figDir.exists(): os.makedirs(figDir) #------------------------------------------------------------------------------------ # load reference and structural interactomes #------------------------------------------------------------------------------------ interactome = pd.read_table(interactomeFile) print('\n' + 'reference interactome:') print('%d PPIs' % len(interactome)) print('%d proteins' % len(set(interactome[["Protein_1", "Protein_2"]].values.flatten()))) structuralInteractome = read_single_interface_annotated_interactome( structuralInteractomeFile) print('\n' + 'interface-annotated interactome:') print('%d PPIs' % len(structuralInteractome)) print('%d proteins' % len( set(structuralInteractome[["Protein_1", "Protein_2" ]].values.flatten()))) #------------------------------------------------------------------------------------ # Produce tissue expression dictionary #------------------------------------------------------------------------------------ # produce protein tissue expression profiles if not proteinExprFile.is_file(): print('\n' + 'producing protein tissue expression dictionary') if expr_db is 'Illumina': produce_illumina_expr_dict(illuminaExprFile, uniprotIDmapFile, proteinExprFile, headers=list(range(1, 18))) elif expr_db is 'GTEx': produce_gtex_expr_dict(gtexDir, uniprotIDmapFile, proteinExprFile, uniprotIDlistFile=uniqueGeneSwissProtIDFile) elif expr_db is 'HPA': produce_hpa_expr_dict(hpaExprFile, uniprotIDmapFile, proteinExprFile) elif expr_db is 'Fantom5': produce_fantom5_expr_dict( fantomExprFile, uniprotIDmapFile, proteinExprFile, sampleTypes='tissues', sampleTypeFile=fantomSampleTypeFile, uniprotIDlistFile=uniqueGeneSwissProtIDFile) with open(proteinExprFile, 'rb') as f: expr = pickle.load(f) if expr_db is 'HPA': exprMap = {'Not detected': 0, 'Low': 1, 'Medium': 2, 'High': 3} for k, v in expr.items(): for i, e in enumerate(v): v[i] = exprMap[e] if e in exprMap else np.nan expr[k] = np.array(v) #----------------------------------------------------------------------------------------- # Calculate tissue co-expression for random interactions #----------------------------------------------------------------------------------------- proteins = list( set(interactome[["Protein_1", "Protein_2"]].values.flatten())) randPairs = pd.DataFrame() randPairs["Protein_1"], randPairs["Protein_2"] = zip( *sample_random_pairs(proteins, numRandPairs)) randPairs = randPairs[randPairs["Protein_1"] != randPairs["Protein_2"]] randPairs["coexpr"] = randPairs.apply(lambda x: coexpr( x["Protein_1"], x["Protein_2"], expr, minTissues=coexprMinTissues), axis=1) randPairs = randPairs[np.isnan(randPairs["coexpr"]) == False].reset_index( drop=True) #----------------------------------------------------------------------------------------- # Calculate tissue co-expression for all interaction partners in the reference interactome #----------------------------------------------------------------------------------------- refPPIs = pd.DataFrame() refPPIs["Protein_1"] = interactome["Protein_1"].tolist() refPPIs["Protein_2"] = interactome["Protein_2"].tolist() refPPIs["coexpr"] = refPPIs.apply(lambda x: coexpr( x["Protein_1"], x["Protein_2"], expr, minTissues=coexprMinTissues), axis=1) refPPIs = refPPIs[np.isnan(refPPIs["coexpr"]) == False].reset_index( drop=True) #------------------------------------------------------------------------------------------ # Calculate tissue co-expression for all interaction partners in the structural interactome #------------------------------------------------------------------------------------------ strucPPIs = pd.DataFrame() strucPPIs["Protein_1"] = structuralInteractome["Protein_1"].tolist() strucPPIs["Protein_2"] = structuralInteractome["Protein_2"].tolist() strucPPIs["coexpr"] = strucPPIs.apply(lambda x: coexpr( x["Protein_1"], x["Protein_2"], expr, minTissues=coexprMinTissues), axis=1) strucPPIs = strucPPIs[np.isnan(strucPPIs["coexpr"]) == False].reset_index( drop=True) #------------------------------------------------------------------------------------- # Save tissue co-expression results to file #------------------------------------------------------------------------------------- allcoexpr = { k: c for k, c in zip([ 'Random interactions', 'Reference interactome', 'Structural interactome' ], [randPairs, refPPIs, strucPPIs]) } with open(coexprFile, 'wb') as fout: pickle.dump(allcoexpr, fout) #------------------------------------------------------------------------------------ # Compare tissue co-expression of interaction partners between reference interactome, # structural interactome and random interactions #------------------------------------------------------------------------------------ # remove NaN values randCoexpr = randPairs["coexpr"].tolist() refCoexpr = refPPIs["coexpr"].tolist() strucCoexpr = strucPPIs["coexpr"].tolist() # print results print('\n' + 'Mean tissue co-expression for interaction partners:') print('Random pairs: %f (SE = %g, n = %d)' % (np.mean(randCoexpr), sderror(randCoexpr), len(randCoexpr))) print('Reference interactome: %f (SE = %g, n = %d)' % (np.mean(refCoexpr), sderror(refCoexpr), len(refCoexpr))) print('Structural interactome: %f (SE = %g, n = %d)' % (np.mean(strucCoexpr), sderror(strucCoexpr), len(strucCoexpr))) print('\n' + 'Statistical significance') print('reference interactome vs random interactions:') bootstrap_test(refCoexpr, randCoexpr, iter=100000) print('structural interactome vs reference interactome:') bootstrap_test(strucCoexpr, refCoexpr, iter=100000) # plot results bar_plot([np.mean(randCoexpr), np.mean(refCoexpr), np.mean(strucCoexpr)], [sderror(randCoexpr), sderror(refCoexpr), sderror(strucCoexpr)], xlabels=[ 'Random\ninteractions', 'Reference\ninteractome', 'Structural\ninteractome' ], ylabel='Tissue co-expression of\ninteraction partners', colors=interactome_colors, edgecolor='k', barwidth=0.5, fontsize=24, show=showFigs, figdir=figDir, figname='interactome_coexpression_%s' % expr_db)
def main(): # reference interactome name # options: HI-II-14, IntAct interactome_name = 'HI-II-14' # structural interactome names struc_name = {'HI-II-14': 'Y2H-SI', 'IntAct': 'IntAct-SI'} # method of calculating mutation ∆∆G for which results will be used # options: bindprofx, foldx ddg_method = 'bindprofx' # Minimum reduction in binding free energy DDG required for interaction perturbation ddgCutoff = 0.5 # % confidence interval CI = 95 # plot perturbed interactome and produce files for use by Cytoscape plot_perturbations = False # show figures showFigs = False # parent directory of all data files dataDir = Path('../data') # parent directory of all processed data files procDir = dataDir / 'processed' # directory of processed data files specific to interactome interactomeDir = procDir / interactome_name # directory of network perturbation output data files for use by Cytoscape cytoscapeDir = interactomeDir / 'cytoscape' # figure directory figDir = Path('../figures') / interactome_name # input data files geometryPerturbsFile = interactomeDir / 'unique_mutation_perturbs_geometry.pkl' natMutDDGinFile = interactomeDir / ('nondisease_mutations_%s_ddg.txt' % ddg_method) disMutDDGinFile = interactomeDir / ('disease_mutations_%s_ddg.txt' % ddg_method) interfaceAnnotatedInteractomeFile = interactomeDir / 'human_interface_annotated_interactome.txt' # output data files natMutDDGoutFile = interactomeDir / ('nondisMut_%s_∆∆G_used.txt' % ddg_method) disMutDDGoutFile = interactomeDir / ('disMut_%s_∆∆G_used.txt' % ddg_method) physicsPerturbsFile = interactomeDir / ( 'mutation_perturbs_physics_%s.pkl' % ddg_method) naturalMutEdgeFile = cytoscapeDir / ( 'nondiseaseMut_perturbed_edges_physics_%s' % ddg_method) naturalMutNodeFile = cytoscapeDir / ( 'nondiseaseMut_node_colors_physics_%s' % ddg_method) diseaseMutEdgeFile = cytoscapeDir / ( 'diseaseMut_perturbed_edges_physics_%s' % ddg_method) diseaseMutNodeFile = cytoscapeDir / ('diseaseMut_node_colors_physics_%s' % ddg_method) # create output directories if not existing if not interactomeDir.exists(): os.makedirs(interactomeDir) if not cytoscapeDir.exists(): os.makedirs(cytoscapeDir) if not figDir.exists(): os.makedirs(figDir) #------------------------------------------------------------------------------------ # Fraction of mutation-targeted PPIs with ∆∆G exceeding a specified cutoff #------------------------------------------------------------------------------------ # read change in binding free energy for interfacial mutations mapped on PDB chains naturalMutationsDDG = read_protein_mutation_ddg(natMutDDGinFile, 'binding') diseaseMutationsDDG = read_protein_mutation_ddg(disMutDDGinFile, 'binding') naturalMutations = pd.DataFrame(columns=[ "protein", "partner", "protein_pos", "mut_res", "pdb_id", "chain_id", "chain_partner", "chain_mut", "ddg" ]) for i, item in enumerate(naturalMutationsDDG.items()): naturalMutations.loc[i] = item[0] + item[1] diseaseMutations = pd.DataFrame(columns=[ "protein", "partner", "protein_pos", "mut_res", "pdb_id", "chain_id", "chain_partner", "chain_mut", "ddg" ]) for i, item in enumerate(diseaseMutationsDDG.items()): diseaseMutations.loc[i] = item[0] + item[1] naturalMutations.to_csv(natMutDDGoutFile, index=False, sep='\t') diseaseMutations.to_csv(disMutDDGoutFile, index=False, sep='\t') natMutDDGs = naturalMutations["ddg"].values disMutDDGs = diseaseMutations["ddg"].values numNatural_ddg_considered = len(natMutDDGs) numDisease_ddg_considered = len(disMutDDGs) print('\n' + 'Avg. change in binding energy (∆∆G) for mutation-targeted PPIs:') print( 'Non-disease: %.1f (SE = %g, n = %d)' % (np.mean(natMutDDGs), sderror(natMutDDGs), numNatural_ddg_considered)) print( 'Disease: %.1f (SE = %g, n = %d)' % (np.mean(disMutDDGs), sderror(disMutDDGs), numDisease_ddg_considered)) # Statistical significance of difference in means t_test(natMutDDGs, disMutDDGs) multi_histogram_plot([disMutDDGs, natMutDDGs], ['red', 'green'], xlabel='Change in binding free energy (∆∆G)', ylabel='Number of mutations', leg=[ 'Disease interfacial mutations', 'Non-disease interfacial mutations' ], bins=25, alpha=0.7, fontsize=24, show=showFigs, figdir=figDir, figname='mut_ddg_histogram_%s' % ddg_method) numNatural_ddg = sum(natMutDDGs > ddgCutoff) numDisease_ddg = sum(disMutDDGs > ddgCutoff) fracNatural_ddg = numNatural_ddg / numNatural_ddg_considered fracDisease_ddg = numDisease_ddg / numDisease_ddg_considered fracNatural_ddg_error = sderror_on_fraction(numNatural_ddg, numNatural_ddg_considered) fracDisease_ddg_error = sderror_on_fraction(numDisease_ddg, numDisease_ddg_considered) print('\n' + 'Fraction of mutation-targeted PPIs with ∆∆G > %.1f kcal/mol:' % ddgCutoff) print('Non-disease: %.3f (SE = %g, ntot = %d)' % (fracNatural_ddg, fracNatural_ddg_error, numNatural_ddg_considered)) print('Disease: %.3f (SE = %g, ntot = %d)' % (fracDisease_ddg, fracDisease_ddg_error, numDisease_ddg_considered)) # Statistical significance of difference in fractions fisher_test([numNatural_ddg, numNatural_ddg_considered - numNatural_ddg], [numDisease_ddg, numDisease_ddg_considered - numDisease_ddg]) bar_plot([fracNatural_ddg, fracDisease_ddg], error=[fracNatural_ddg_error, fracDisease_ddg_error], xlabels=('%s PPIs\nwith non-disease\nmutations\nat interface' % struc_name[interactome_name], '%s PPIs\nwith disease\nmutations\nat interface' % struc_name[interactome_name]), ylabel=('Fraction with ∆∆G > %.1f kcal / mol' % ddgCutoff), ylabels=[0, 0.2, 0.4, 0.6, 0.8], ylim=[0, 0.8], colors=['turquoise', 'magenta'], edgecolor='black', ewidth=2.5, barwidth=0.6, fontsize=24, capsize=10, msize=26, show=showFigs, figdir=figDir, figname='mut_ddg_frac_>%.1f_%s' % (ddgCutoff, ddg_method)) #------------------------------------------------------------------------------------ # predict PPI perturbations #------------------------------------------------------------------------------------ if geometryPerturbsFile.is_file(): print('\n' + 'Loading geometry-based PPI perturbation predictions') with open(geometryPerturbsFile, 'rb') as f: naturalPerturbs, diseasePerturbs = pickle.load(f) else: print('\n' + 'Geometry-based PPI perturbation prediction file not found') return print( '\n' + 'Performing physics-based edgotype prediction for non-disease mutations' ) naturalPerturbs[ "perturbations"], knownDDG, unknownDDG = energy_based_perturbation( naturalPerturbs, naturalMutationsDDG, ddgCutoff) print('\n' + 'Performing physics-based edgotype prediction for disease mutations') diseasePerturbs[ "perturbations"], knownDDG, unknownDDG = energy_based_perturbation( diseasePerturbs, diseaseMutationsDDG, ddgCutoff) with open(physicsPerturbsFile, 'wb') as fOut: pickle.dump([naturalPerturbs, diseasePerturbs], fOut) #------------------------------------------------------------------------------------ # plot network perturbations #------------------------------------------------------------------------------------ if plot_perturbations: structuralInteractome = read_interface_annotated_interactome( interfaceAnnotatedInteractomeFile) print('\n' + 'Creating network perturbed by non-disease mutations') nodes, edges, nodeColors, edgeColors = create_perturbed_network( structuralInteractome, naturalPerturbs, naturalMutEdgeFile, naturalMutNodeFile) network_plot( edges, nodes=nodes, nodeSizes=[20] * len(nodes), edgeWidth=1, nodeColors=nodeColors, edgeColors=edgeColors, show=showFigs, figdir=figDir, figname='nondisease_mut_perturbed_interactome_physics_%s' % ddg_method) print('\n' + 'Creating network perturbed by disease mutations') nodes, edges, nodeColors, edgeColors = create_perturbed_network( structuralInteractome, diseasePerturbs, diseaseMutEdgeFile, diseaseMutNodeFile) network_plot(edges, nodes=nodes, nodeSizes=[20] * len(nodes), edgeWidth=1, nodeColors=nodeColors, edgeColors=edgeColors, show=showFigs, figdir=figDir, figname='disease_mut_perturbed_interactome_physics_%s' % ddg_method)