continue #export the pVals2 as a csv for rep in pVals2: pVals2[rep].to_csv(os.path.join(savedir, str(rep) + 'sigPs.csv' )) #make some violin plots of the significant features from feature_swarms import swarms cmap2 = sns.color_palette("Blues", len(allConcs)) sns.set_style('whitegrid') for rep in pVals2: for feat in range(0,10): #swarms (str(rep), sig_feats[rep][feat][0], featMatZ2[featMatZ2['date']==rep], savedir, '.tif', cmap) swarms('all', sig_feats[rep][feat][0], featMatZ2 ,savedir, '.tif', cmap2) # ============================================================================= # Things to do: # 1. Train classifier to distinguish between Atypical and typical drugs # 2. check DMSO controls on all dates and plot to compare # 3. Make a list of the drugs to repeat - DONE # ============================================================================= #%% Compare features across reps for date in allDates: plt.figure()
del ps, temp3, temp4 del temp, temp2, conc #export the pVals2 as a csv for rep in pVals2: pVals2[rep].to_csv(os.path.join(directoryA[:-7], rep + 'sigPs.csv' )) #make some violin plots of the significant features import feature_swarms as swarm cmap = sns.color_palette("tab20", len(uniqueDrugs)) sns.set_style('whitegrid') for rep in featuresA2: for feat in range(0,10): swarm.swarms (rep, sig_feats[rep][feat][0], featuresA2[rep], directoryA, '.tif', cmap) #make a list of any of the features that are significantly different in all experiments stats_feats = [] for rep in sig_feats: stats_feats += list(list(zip(*sig_feats[rep]))[0]) stats_feats = np.unique(stats_feats) #%% reduce the feature set to the significant ones - for Clozapine10 #make a list combined from all reps cloz10Feats = [] for rep in pVals2: cloz10 = pVals2[rep][pVals2[rep]['drug'] == 'Clozapine'][pVals2[rep]['concentration']==10.0] cloz10Feats += list(cloz10.columns[np.where(cloz10.notnull())[1]]) del cloz10
sig_feats[rep].append( (bh_p[rep].columns[feat], np.sum(top_feats[rep][:, feat]))) #sort by most to least sig_feats[rep].sort(key=lambda tup: tup[1]) sig_feats[rep].reverse() #make some violin plots of the significant features import feature_swarms as swarm cmap = sns.color_palette("tab20", len(uniqueDrugs)) sns.set_style('whitegrid') for rep in featuresL2: for feat in range(0, 10): swarm.swarms(rep, sig_feats[rep][feat][0], featuresL2[rep], directoryL, '.tif', cmap) #so looks like can pull out differences between DMSO and chloropromazine and clozapine. Data is messier probably #because only one repeat #%% classifier v1 #use only features that are significant to train the classifier #convert the list of tuples into a list of features bh_list = [i[0] for i in sig_feats[rep]] #only 120 features featMatAllL3 = featMatAllL[bh_list] featMatAllL3['drug'] = drug_allL featMatAllL3['concentration'] = conc_allL featMatAllL3['date'] = date_allL
feat_scores = pd.DataFrame(data = np.array(temp), columns = mr_Feats2) del temp #now find out which features are in the top and bottom 10%, and then only take forward \ #those that are in the top/bottom 10% in 50% of the CVs bottom = list(feat_scores.columns[np.sum(feat_scores<=15)>=5]) #bottom 10% top = list(feat_scores.columns[np.sum(feat_scores>=135)>=5]) #top 10% #combine combi = top + bottom #22 features #plot these as swarms: import feature_swarms as swarm for item in combi: swarm.swarms('all', item, featMatAll, directoryA, '.tif', cmap1 ) plt.close() #make a dataframe of this final featurespace final_feat = pd.concat([mrFeatMatFinal[combi], mrFeatMatFinal.iloc[:,-3:]], axis=1) final_feat['class'] = conds2['drug'] #make a clustergram #1. make lut for drug colors #make a colormap and lookup table cmap1 = sns.color_palette("tab20", np.unique(final_feat['class']).shape[0]) lut = dict(zip(np.unique(final_feat['class']), cmap1)) #add in row colors to the dataframe row_colors = final_feat['class'].map(lut)#map onto the feature Matrix
del ps, temp3, temp4 del temp, temp2, conc #export the pVals2 as a csv for rep in pVals2: pVals2[rep].to_csv(os.path.join(directoryA[:-7], rep + 'sigPs.csv')) #make some violin plots of the significant features import feature_swarms as swarm cmap = sns.color_palette("Blues", len(uniqueDrugs)) sns.set_style('whitegrid') for rep in featuresA2: for feat in range(0, 10): swarm.swarms('all', sig_feats[rep][feat][0], featMatAll, directoryA, '.tif', cmap) #make a list of any of the features that are significantly different in all experiments stats_feats = [] for rep in sig_feats: stats_feats += list(list(zip(*sig_feats[rep]))[0]) stats_feats = np.unique(stats_feats) #%% reduce the feature set to the significant ones - for Clozapine10 #make a list combined from all reps cloz10Feats = [] for rep in pVals2: cloz10 = pVals2[rep][pVals2[rep]['drug'] == 'Clozapine'][ pVals2[rep]['concentration'] == 10.0] cloz10Feats += list(cloz10.columns[np.where(cloz10.notnull())[1]])