def write_MOHGP_results(organ, strain, path, fit, geneID): """ A helper function akin to 'write_gene_model_results' to save MOHGP results to disk Arguments ========= organ - Blood/Spleen strain - AS/CB path - a structured dictionary of paths returned by config fit - a Mixture of Hierarchical Gaussian Process model geneID - a pandas data frame containing ordered probeID/geneSymbols NOTE: this is different from probesToCluster order Returns ========= None - data saved to disk """ # Extract the cluster assigned to each probe clustNum = np.argmax(fit.phi, axis=1) + 1 # cluster number clustName = [strain + '_' + organ[:2] + '_%02d' % i for i in clustNum] # cluster name geneID['Cluster'] = clustName # add to data frame # Extract the gene and probe list geneList = []; probeList = []; header = [] for name in np.unique(clustName): bWant = geneID['Cluster'] == name geneList.append(list(geneID.loc[bWant, 'Symbol'])) probeList.append(list(geneID.loc[bWant, 'ProbeID'])) header.append(name) # Save to disk io.write_list_to_csv(os.path.join(path['Clust']['GeneList'], organ + strain + '.csv'), header, geneList) io.write_list_to_csv(os.path.join(path['Clust']['ProbeList'], organ + strain + '.csv'), header, probeList) # Probe list # Save model and standard plot io.save_pickle(os.path.join(path['Clust']['Model'], organ + strain + ".pickle"), fit) io.save_pdf(os.path.join(path['Clust']['Plot'], organ + strain + ".pdf"), standard_plot(fit)) # Compute cluster predictions for xTest where xTest is taken from SmoothExprs data = pd.read_csv(os.path.join(path['GPFit']['SmoothExprs'], organ + strain + ".csv")) xTest = data.drop(['ProbeID', 'Symbol'], axis=1).columns.values.astype('float64')[:, None] mu, var = fit.predict_components(xTest) # Compute posterior mean and posterior variance # Write to disk (mu row ordering is biggest to smallest cluster) df = pd.DataFrame(data=np.array(mu), columns=list(map(str, xTest.flatten()))) df['Cluster'] = header # header = cluster name df.to_csv(os.path.join(path['Clust']['Centres'], organ + strain + '.csv'), index=False) clustCentre = df # for readability # Merge smooth expression data frame with gene ID smoothExprs = pd.merge(geneID, data, how='left', on=['ProbeID', 'Symbol']) # Produce alternate plot hFig = alternate_plot(smoothExprs, clustCentre, config.COL[organ]) io.save_pdf(os.path.join(path['Clust']['Plot'], organ + strain + '2.pdf'), hFig)
def cluster_membership_boxplot(organ, strain, path): """ Boxplot of posterior probability that a gene pertains to that cluster Arguments ========= organ - Blood/Spleen strain - AS/CB path - dictionary with all results paths Returns ========= None - Figure is saved to 'Misc' folder """ # Read model fit fit = io.load_pickle( os.path.join(path['Clust']['Model'], organ + strain + ".pickle")) fit.reorder() labels = np.argmax(fit.phi, axis=1) # 0, 1, 2, etc. # Read cluster names e.g 0 --> AS_Bl_01 data = pd.read_csv(os.path.join(path['Clust']['Centres'], organ + strain + ".csv"), sep=",") # Create a list of cluster membership membership = [] clustName = [] for label in np.unique(labels): membership.append(fit.phi[labels == label, label]) clustName.append(data['Cluster'][label]) # Create boxplot hFig = plt.figure(figsize=(16, 5)) hAx = hFig.gca() hAx.boxplot(membership, labels=clustName, patch_artist=True, notch=True, boxprops={"facecolor": COL[organ]}, medianprops={ "color": "black", "linewidth": 3 }, whiskerprops={"color": "black"}) hAx.set_ylabel("Posterior probability of assigning gene to cluster") hAx.set_ylim(0, 1) # Save figure to file filePath = os.path.join(path['Misc'], "ClusterMembership" + organ + strain + '.pdf') io.save_pdf(filePath, hFig)
def fit_plot_save(k, smoothExprs, day, probeID, geneSymbol, organ, strain, path): """ Fit k-means, plot and save results Arguments ========= k - no. of clusters smoothExprs - gene expression rows = genes, columns = day day - day probeID - probeID geneSymbol - geneSymbol path - path Returns ========= None - results are plotted and saved """ model = KMeans(n_clusters=k) model.fit(smoothExprs) clustCentre = model.cluster_centers_ # Plot results plot_silhouette(silhouette_samples(smoothExprs, model.labels_), model.labels_) clust.multi_plot(smoothExprs, clustCentre, day, model.labels_) # Hierarchical clustering # Ward + Euclidean header = ["Cluster%i" % label for label in np.unique(model.labels_)] hclust = hc.linkage(clustCentre, method='ward', metric='euclidean') plt.figure(); plt.title("Hclust() Ward + Euclidean") hc.dendrogram(hclust, color_threshold=0.0, labels=header) #seed=101 #embedding = tsne.tsne(smoothExprs, no_dims = 3, initial_dims = 20, perplexity = 30.0, seed=seed) # low dimensional embedding #tsne.plot(embedding, model.labels_) # Save model io.save_pickle(os.path.join(path['Clust']['Model'], organ + strain + ".pickle"), model) # Save Gene/Probe List geneList = clust.get_gene_list(model.labels_, geneSymbol) probeList = clust.get_gene_list(model.labels_, probeID) io.write_list_to_csv(os.path.join(path['Clust']['GeneList'], organ + strain + ".csv"), header, geneList) # Gene list io.write_list_to_csv(os.path.join(path['Clust']['ProbeList'], organ + strain + ".csv"), header, probeList) # Probe list # Save Cluster "centres" dataMatrix = np.hstack((np.array(header)[:, None], clustCentre)) header = list(itertools.chain.from_iterable([["Cluster"], list(day)])) io.write_to_csv(os.path.join(path['Clust']['Centres'], organ + strain + ".csv"), header, dataMatrix) # Cluster "centres" # Save Alternate plot hFig = clust.multi_plot(smoothExprs, clustCentre, day, model.labels_) io.save_pdf(os.path.join(path['Clust']['Plot'], organ + strain + "2.pdf"), hFig) # Plot
def merge(organ, strain, groupToMerge, groupLabel, originalLabel, path): """ Merge modules Arguments ========= groupToMerge - list of lists e.g [[1,2], [3]] groupLabel - list of unique group labels e.g ["A", "B"] Returns ========= newLabel - A, B, C etc. """ # Load gene/probe list oldGeneList = pandas.read_csv(os.path.join(path['Clust']['GeneList'], organ + strain + ".csv"), sep=",") oldProbeList = pandas.read_csv(os.path.join(path['Clust']['ProbeList'], organ + strain + ".csv"), sep=",") # Initialise vars NGroup = len(groupToMerge) newLabel = originalLabel.astype(type(groupLabel)) newGeneList = [] newProbeList = [] for iGroup in xrange(NGroup): tempGeneList = [] tempProbeList = [] for label in groupToMerge[iGroup]: newLabel[originalLabel == label] = groupLabel[iGroup] bWant = ~pandas.isnull(oldGeneList['Cluster' + str(label)]) # some entries could be NaN tempGeneList.append(np.array(oldGeneList['Cluster' + str(label)][bWant])) tempProbeList.append(np.array(oldProbeList['Cluster' + str(label)][bWant])) newGeneList.append(list(itertools.chain.from_iterable(tempGeneList))) newProbeList.append(list(itertools.chain.from_iterable(tempProbeList))) # Save Gene/Probe List header = ["Cluster%s" % label for label in groupLabel] io.write_list_to_csv(os.path.join(path['ClustMerge']['GeneList'], organ + strain + ".csv"), header, newGeneList) io.write_list_to_csv(os.path.join(path['ClustMerge']['ProbeList'], organ + strain + ".csv"), header, newProbeList) # Retrieve old clust centres data = pandas.read_csv(os.path.join(path['Clust']['Centres'], organ + strain + ".csv"), sep=",") #oldClustCentre = data.values[:, 1:] # pick only the centres day = data.columns.values[1:].astype('float') # Get smooth exprs data = pandas.read_csv(os.path.join(path['GPFit']['SmoothExprs'], organ + strain + ".csv"), sep=",") #bSelect = top_ranked(organ, strain, len(originalLabel), path) # 29/03/16 not applicable anymore as I'm choosing COMMON gene sets using clust.common_ranked() allProbeID = np.array(data['ProbeID']) wantedProbeID = np.array(list(itertools.chain.from_iterable(newProbeList))) bSelect = np.array([allProbeID[i] in wantedProbeID for i in xrange(len(allProbeID))]) # simply creates a vector of T, F, T, whether gene is in geneSet or smoothExprs = data.values[:, 2:].astype('float')[bSelect, :] # get new label of old clust centres i.e 1, 2, 3, 4, 5 --> 'A', 'B', 'A', 'C', 'B' # VERY ugly - should've used dictionaries....hey ho newLabelOldClustCentre = np.empty((len(np.unique(originalLabel))), dtype='str') for iGroup in xrange(NGroup): for label in groupToMerge[iGroup]: # I know that label is numeric, else it would fail newLabelOldClustCentre[label-1] = groupLabel[iGroup] # -1 as "I" start counting from 1 # Naively take the mean clustCentre = np.empty((len(groupLabel), len(day))) for i, label in enumerate(groupLabel): clustCentre[i, :] = np.mean(smoothExprs[newLabel==label, :], axis=0) # #Using GPR was creating numerical issues so now (naively) I'm taking the mean # #Compute clust centres (should research into doing this "properly" i.e using MOHGP, but coz for now I'm only # #interested in gene symbols it should be fine) # clustCentre = np.empty((len(groupLabel), len(day))) # for i, label in enumerate(np.unique(newLabel)): # thisClustCentre = oldClustCentre[newLabelOldClustCentre == label, :] # if sum(newLabelOldClustCentre == label) == 1: # clustCentre[i, :] = thisClustCentre # no need to GPR # else: # xTrain = np.tile(day, sum(newLabelOldClustCentre == label)).flatten()[:, None] # yTrain = thisClustCentre.flatten()[:, None] # fit = gpr.fit(xTrain, yTrain) # mu, var = fit.predict(day[:, None]) # clustCentre[i, :] = mu.T # Save Cluster "centres" dataMatrix = np.hstack((np.array(header)[:, None], clustCentre)) # using header from Save Gene/Probe List header = list(itertools.chain.from_iterable([["Cluster"], list(day)])) io.write_to_csv(os.path.join(path['ClustMerge']['Centres'], organ + strain + ".csv"), header, dataMatrix) # Cluster "centres" # Save Alternate plot hFig = multi_plot(smoothExprs, clustCentre, day, newLabel) io.save_pdf(os.path.join(path['ClustMerge']['Plot'], organ + strain + "2.pdf"), hFig) # Plot return newLabel
def MOHGP(probesToCluster, organ, strain, prefix, K, alpha, path, seed=0): """ Word cloud plot for all clusters in a dataset Arguments ========= probesToCluster - a set of unique probeIDs to cluster organ - blood/spleen strain - AS/CB prefix - all/common/only K - init no. of clusters alpha - concentration parameter/strength parameter of the Dirichlet Process Prior path - dictionary with all results paths seed - to reproduce results due to multiple local optima Returns ========= None - a Mixture of Hierarchical Gaussian Process model is fitted and saved to disk """ # To reproduce results np.random.seed(seed) # Load gene expression data data = pandas.read_csv(os.path.join(path['RawData']['Log2FC'], organ + strain + ".csv"), sep=",") # read data probeID = np.array(data['ProbeID']) yTrain = data.values[:, 2:].astype('float') # 45,281 genes x S samples xTrain = np.floor(data.columns.values[2:].astype('float64'))[:, None] # floor to get int 0, 0, 2, 2, ..., 12 # Subset the data by keeping only probesToCluster bWant = np.array([probeID[i] in probesToCluster for i in xrange(len(probeID))]) # simply creates a vector of T, F, T yTrain = yTrain[bWant, :] probeID = np.array(data['ProbeID'][bWant]) geneSymbol = np.array(data['GeneSymbol'][bWant]) # MOHGP fitting # Define the covariance functions for the hierarchical GP structure # The model of any cluster of genes has a hierarchical structure, with the unknown cluster-specific # mean drawn from a GP, and then each gene in that cluster being drawn from a GP with said unknown mean function. # Covariance function for the latent function that describes EACH cluster. covFunCluster = GPy.kern.RBF(input_dim=1, variance=np.var(yTrain.ravel()), lengthscale=LENGTHSCALE) # Covariance function that describes how EACH time-course (gene) deviates from the cluster covFunGene = GPy.kern.RBF(input_dim=1, variance=np.var(yTrain.ravel())/10, lengthscale=LENGTHSCALE) + \ GPy.kern.White(1, variance=NOISE_VARIANCE) # Set-up the clustering problem NB: For large alpha P resembles Po (i.e the base distribution) fit = GPclust.MOHGP(X=xTrain, kernF=covFunCluster, kernY=covFunGene, Y=yTrain, K=K, prior_Z='DP', alpha=alpha) # Constrain lengthscales (to avoid very short lengthscales as per Topa et al. (2012) on arXiv) fit.rbf.lengthscale.constrain_bounded(LOWER_BOUND_LENGTHSCALE, UPPER_BOUND_LENGTHSCALE , warning=False) fit.add.rbf.lengthscale.constrain_bounded(LOWER_BOUND_LENGTHSCALE, UPPER_BOUND_LENGTHSCALE , warning=False) fit.hyperparam_opt_interval = 1000 # how often to optimize the hyperparameters # Optimise hyperparameters fit.optimize() fit.systematic_splits(verbose=False) # Name and reorder fit fit.name = prefix + organ + strain fit.reorder() labels = np.argmax(fit.phi, axis=1) + 1 # cluster number # Compute cluster prediction for xTest where xTest is taken from SmoothExprs data = pandas.read_csv(os.path.join(path['GPFit']['SmoothExprs'], organ + strain + ".csv"), sep=",") # read data smoothExprs = data.values[:, 2:].astype('float')[bWant, :] xTest = data.columns.values[2:].astype('float64')[:, None] mu, var = fit.predict_components(xTest) clustCentre = np.empty((len(mu), xTest.shape[0])) for iClust in xrange(len(mu)): clustCentre[iClust, :] = mu[iClust] # Save model and plot io.save_pickle(os.path.join(path['Clust']['Model'], prefix + organ + strain + ".pickle"), fit) io.save_pdf(os.path.join(path['Clust']['Plot'], prefix + organ + strain + ".pdf"), plot(fit)) # Save Gene/Probe List geneList = get_gene_list(labels, geneSymbol) probeList = get_gene_list(labels, probeID) header = ["Cluster%i" % label for label in np.unique(labels)] io.write_list_to_csv(os.path.join(path['Clust']['GeneList'], prefix + organ + strain + ".csv"), header, geneList) # Gene list io.write_list_to_csv(os.path.join(path['Clust']['ProbeList'], prefix + organ + strain + ".csv"), header, probeList) # Probe list # Save Cluster "centres" dataMatrix = np.hstack((np.array(header)[:, None], clustCentre)) header = list(itertools.chain.from_iterable([["Cluster"], list(xTest.ravel())])) io.write_to_csv(os.path.join(path['Clust']['Centres'], prefix + organ + strain + ".csv"), header, dataMatrix) # Cluster "centres" # Save Alternate plot hFig = multi_plot(smoothExprs, clustCentre, xTest, labels) io.save_pdf(os.path.join(path['Clust']['Plot'], prefix + organ + strain + "2.pdf"), hFig) # Plot # Word cloud #vis.word_cloud_plot(organ, strain, prefix, path) # Heatmap vis.heatmap_plot_by_clusters(organ, strain, prefix, path)
def compare_arbitrary_clusters(organA, strainA, iiClusterA, colourA, organB, strainB, iiClusterB, colourB, prefix, path): """ Compare arbitrary cluster e.g CB_Bl_10 + CB_Bl_11 vs CB_Sp_07 Arguments ========= organA/B - blood/spleen strainA/B - AS/CB iiClusterA/B - list of indices of clusters to compare e.g [10, 11] colourA/B - colour of conditions red/blue "#e41a1c"/"#377eb8" prefix - all/common/only path - dictionary with all results paths Returns ========= None - figures saved to disk + .csv file of common genes saved to disk """ # Load probe list by cluster probeListA = pandas.read_csv(os.path.join(path['Clust']['ProbeList'], prefix + organA + strainA + ".csv"), sep=",") probeListB = pandas.read_csv(os.path.join(path['Clust']['ProbeList'], prefix + organB + strainB + ".csv"), sep=",") # Pick only the clusters we're interested in # A probeIDA = [] for iClust in iiClusterA: probeIDA.append(probeListA['Cluster' + str(iClust)].dropna()) # drop NA entries probeIDA = list(itertools.chain.from_iterable(probeIDA)) # B probeIDB = [] for iClust in iiClusterB: probeIDB.append(probeListB['Cluster' + str(iClust)].dropna()) # drop NA entries probeIDB = list(itertools.chain.from_iterable(probeIDB)) # Find common probes commonProbes = set(probeIDA) & set(probeIDB) # Read GPR smoothed gene expression # A data = pandas.read_csv(os.path.join(path['GPFit']['SmoothExprs'], organA + strainA + ".csv"), sep=",") allProbeIDA = np.array(data['ProbeID']) smoothExprsA = data.values[:, 2:].astype('float') day = data.columns.values[2:].astype('float') # B data = pandas.read_csv(os.path.join(path['GPFit']['SmoothExprs'], organB + strainB + ".csv"), sep=",") allProbeIDB = np.array(data['ProbeID']) smoothExprsB = data.values[:, 2:].astype('float') # Plot data # Create figure hFig, (hAx1, hAx2, hAx3) = plt.subplots(1, 3, sharex=False, sharey=False, figsize=(16, 8.27/1.8)) # a4 = 11.69 x 8.27 inches # Condition A for iProbe in xrange(len(probeIDA)): iiWant = np.where(allProbeIDA == probeIDA[iProbe])[0][0] # should return only one index if probeIDA[iProbe] in commonProbes: hAx1.plot(day, smoothExprsA[iiWant, :], linewidth=2, color="#525252", alpha=ALPHA) else: hAx1.plot(day, smoothExprsA[iiWant, :], linewidth=0.5, color=colourA, alpha=ALPHA) hAx1.axhline(y=0, color="black", linestyle='--', linewidth=2) # Set x/y limits and title hAx1.set_xlim(XLIM) hAx1.set_ylim(YLIM) #hAx1.set_title("%s%s - %s (#%d probes) r = %.2f" % (organA, strainA, clustNamesA[iClustA], len(thisProbeListA), rho)) hAx1.set_title(strainA + '_' + organA[:2] + str(iiClusterA)) hAx1.set_ylabel("$\log_2$(FC) wrt naive") hAx1.set_xlabel("Days") hAx1.grid(axis="y") # Venn diagram plt.sca(hAx2) hVen = venn2([set(probeIDA), set(probeIDB)], set_colors=(colourA, colourB), set_labels=("", ""), alpha=0.9) for label in hVen.subset_labels: # hVen.set_labels = "blood"/"spleen"; hVen.subset_labels = 3211/3211/1317 if hasattr(label, 'set_fontsize'): label.set_fontsize(20) if len(commonProbes)>0: hVen.get_patch_by_id('11').set_color('#525252') # make the intersection grey # Condition B for iProbe in xrange(len(probeIDB)): iiWant = np.where(allProbeIDB == probeIDB[iProbe])[0][0] # should return only one index if probeIDB[iProbe] in commonProbes: hAx3.plot(day, smoothExprsB[iiWant, :], linewidth=2, color="#525252", alpha=ALPHA) else: hAx3.plot(day, smoothExprsB[iiWant, :], linewidth=0.5, color=colourB, alpha=ALPHA) hAx3.axhline(y=0, color="black", linestyle='--', linewidth=2) # Set x/y limits and title hAx3.set_xlim(XLIM) hAx3.set_ylim(YLIM) #hAx3.set_title("%s%s - %s (#%d probes)" % (organB, strainB, clustNamesB[iClustB], len(thisProbeListB))) hAx3.set_title(strainB + '_' + organB[:2] + str(iiClusterB)) hAx3.set_ylabel("$\log_2$(FC) wrt naive") hAx3.set_xlabel("Days") hAx3.grid(axis="y") # Save figure as .pdf fileName = strainA + '_' + organA[:2] + str(iiClusterA) + '_vs_' + strainB + '_' + organB[:2] + str(iiClusterB) io.save_pdf(os.path.join(path['Comparison'], fileName) + ".pdf", hFig) # Write common probes to .csv header = ["ProbeID", "GeneSymbol"] commonGeneSymbol=[] for probe in commonProbes: # get gene symbol iiWant = np.where(allProbeIDB == probe)[0][0] # should return one value (probeIDs are unique) commonGeneSymbol.append(data['GeneSymbol'].values[iiWant]) dataMatrix = np.hstack((np.array(list(commonProbes))[:, None], np.array(commonGeneSymbol)[:, None])) io.write_to_csv(os.path.join(path['Comparison'], fileName) + "shared_genes.csv", header, dataMatrix)
import enrichr import compare #*************************************************************************************************# # Setup/Create folders and thus all paths (stored as a structured dictionary) #*************************************************************************************************# path = setup_folders() #*************************************************************************************************# # PCA plot considering all measured probes and blood and spleen together #*************************************************************************************************# # Consider all tx strain = 'AS' for organ in ORGANS: hWt, hPCA = util.pca_plot([organ], strain, path, bLegend=True) io.save_pdf(os.path.join(path['Misc'], 'PCA' + organ + strain + 'All.pdf'), hPCA) io.save_pdf(os.path.join(path['Misc'], 'PCAWts' + organ + strain + 'All.pdf'), hWt) # Spleen + Blood together hWt, hPCA = util.pca_plot(ORGANS, strain, path, bLegend=True) io.save_pdf(os.path.join(path['Misc'], 'PCA' + "".join(ORGANS) + strain + 'All.pdf'), hPCA) io.save_pdf(os.path.join(path['Misc'], 'PCAWts' + "".join(ORGANS) + strain + 'All.pdf'), hWt) # Consider ONLY top ranked for organ in ORGANS: hWt, hPCA = util.pca_plot([organ], strain, path, topRanked=TOP_RANKED, bLegend=True) io.save_pdf(os.path.join(path['Misc'], 'PCA' + organ + strain + 'Top.pdf'), hPCA) io.save_pdf(os.path.join(path['Misc'], 'PCAWts' + organ + strain + 'Top.pdf'), hWt) # Spleen + Blood together hWt, hPCA = util.pca_plot(ORGANS, strain, path, topRanked=TOP_RANKED, bLegend=True) io.save_pdf(os.path.join(path['Misc'], 'PCA' + "".join(ORGANS) + strain + 'Top.pdf'), hPCA) io.save_pdf(os.path.join(path['Misc'], 'PCAWts' + "".join(ORGANS) + strain + 'Top.pdf'), hWt)
hFig, hAxs= plt.subplots(2, 2, sharex=True, sharey=False, figsize=(11.69, 8.27)) # a4 = 11.69 x 8.27 inches metrics = ['score', 'SNR', 'maxLogFC', 'rank'] for i, hAx in enumerate(hFig.axes): for organ in ORGANS: for strain in STRAINS: data = pandas.read_csv(os.path.join(path['GPFit']['Metrics'], organ + strain + ".csv"), sep=",") metric = np.array(data[metrics[i]]) hAx.plot(np.sort(metric), label=organ + strain, linewidth=2) hAx.set_xlabel("No. of Probes") hAx.set_ylabel(metrics[i]) hAx.set_xlim((0, 46000)) hAx.axvline(x=45281-topRanked, c="black", linewidth=2, linestyle='--') hAx.legend(loc=2) #plt.tight_layout() filePath = os.path.join(path['Misc'], 'DEG.pdf') io.save_pdf(filePath, hFig) #SNR = np.array(data['SNR']) #maxLogFC = np.array(data['maxLogFC']) #score = np.array(data['score']) #hFig = plt.figure("DEG") # Handle to figure #hAx = hFig.add_subplot(111, projection='3d') # Handle to axis #hAx.scatter(SNR, maxLogFC, score, color='grey', marker='.', s=10) # marker size #----------------------------------------------------------------------------------------------------------------------# # D) Crude estimation for number of clusters #----------------------------------------------------------------------------------------------------------------------# from sklearn.cluster import KMeans hFig = plt.figure() hAx = plt.gca() for organ in ORGANS:
def clusters(organs, strains, path, clustIndices): """ Visually compare arbitrary clusters across different conditions organ/strain A/B ========= organs - a list of two organs e.g ['Blood', 'Spleen'] strains - a list of two strains e.g ['AS', 'CB'] path - a structured dictionary of paths returned by config clustIndices - a list of cluster indices e.g [[1, 4, 5], [6, 7]] Returns ========= hFig - figure handle """ # Simple data checking if len(organs) != 2 or len(strains) != 2 or len(clustIndices) != 2: warnings.warn("Arguments not compatible with function requirements!", UserWarning) hFig = None else: # Find common probes i.e to plot in grey shared, left, right = shared_genes(organs, strains, path, clustIndices) # Create figure (a4 = 11.69 x 8.27 inches) hFig, hAx = plt.subplots(1, 3, sharex=False, sharey=False, figsize=(16, 8.27 / 1.8)) title = [ '{}_{}{}'.format(strains[i], organs[i][:2], clustIndices[i]) for i in range(2) ] # Plot shared and only genes cluster_subplot(organs[0], strains[0], path, shared, left, title[0], hAx[0]) cluster_subplot(organs[1], strains[1], path, shared, right, title[1], hAx[2]) # beware 2 # Plot venn diagram plt.sca(hAx[1]) hVen = venn2([left.shape[0], right.shape[0], shared.shape[0]], set_colors=(COL[organs[0]], COL[organs[1]]), set_labels=("", ""), alpha=0.9) for label in hVen.subset_labels: if hasattr(label, 'set_fontsize'): label.set_fontsize(20) if shared.shape[0] > 0: hVen.get_patch_by_id('11').set_color( COL['Shade']) # make intersection grey # Create folder where to put results folderName = '{}_{}'.format(title[0], title[1]) thisDir = io.create_folder(path['Comparison'], folderName) # Save plot and gene lists io.save_pdf(os.path.join(thisDir, 'timeplot.pdf'), hFig) shared.to_csv(os.path.join(thisDir, 'shared.csv'), index=False) left.to_csv(os.path.join(thisDir, 'left.csv'), index=False) right.to_csv(os.path.join(thisDir, 'right.csv'), index=False)