def plt_3d(trgt): row, col = trgt.shape[1], trgt.shape[2] x, y = np.arange(0, row), np.arange(0, col) x, y = np.meshgrid(x, y) z = trgt[-5, :, :, 0] fig = plt.figure() ax = ax3d(fig) ax.plot_surface(x, y, z, rstride=3, cstride=3, cmap='rainbow') dz = z.ravel() offset = dz + np.abs(dz.min()) fracs = offset / offset.max() # norm = colors.Normalize(fracs.min(), fracs.max()) # color_values = cm.jet(norm(fracs.tolist())) cmp = plt.get_cmap('Blues') cnorm = colors.Normalize(vmin=0, vmax=1) scalar = cm.ScalarMappable(norm=cnorm, cmap=cmp) color_values = np.array([scalar.to_rgba(x) for x in fracs]) # color_values = np.multiply(color_values, [1,1,1,0.8]) ax.bar3d(x.ravel(), y.ravel(), np.zeros_like(y.ravel()), dx=1, dy=1, dz=z.ravel(), color=color_values) ax.view_init(75, 165) # ax.set_axis_off() # plt.show() pp = pdf('figs/0.pdf') plt.savefig(pp, format='pdf') pp.close() fig = plt.figure() ax = ax3d(fig) gx, gy = np.mgrid[0:31:160j, 0:31:160j] gxy = np.array([x.ravel(), y.ravel()]) gz = griddata(gxy.transpose(), z.ravel(), (gx, gy), method='cubic') gz = (gz - gz.min()) / gz.sum() z1 = trgt[-10, :, :, 1] gz1 = griddata(gxy.transpose(), z1.ravel(), (gx, gy), method='cubic') ax.plot_surface(gx, gy, gz, rstride=1, cstride=1, cmap='Reds') # ax.plot_surface(gx-8, gy+24, gz1+1000, rstride=1, cstride=1, cmap='Reds') # ax.plot_surface(gx, gy, -gz-50, rstride=1, cstride=1, cmap='rainbow') ax.view_init(75, 165) # ax.set_axis_off() # plt.show() pp = pdf('figs/3.pdf') plt.savefig(pp, format='pdf') pp.close() print('end')
def compar_band_multipage(self, df, fout, xname='Li_Mean_val', yname='Lsky', cname='Solar_Zenith', title=''): plt.ioff() with pdf(fout) as p: for (wl, group) in df.groupby(level=2, axis=1): if wl != '': print(wl) group.dropna(inplace=True) x = group.xs(xname, level=1, axis=1).values[:, 0] y = group.xs(yname, level=1, axis=1).values[:, 0] c = group.xs(cname, level=1, axis=1).values[:, 0] fig, self.ax = plt.subplots(figsize=(6, 6)) ymax = max(x.max(), y.max()) self.ax.set(xlim=(0, ymax), ylim=(0, ymax), aspect=1) self.ax.plot([0, ymax], [0, ymax], '--', color='grey') im = self.ax.scatter(x, y, c=c, cmap='gnuplot') self.annot(x, y, ymax) fig.colorbar(im, ax=self.ax) fig.suptitle(title + ' at ' + str(wl) + ' nm') fig.tight_layout() p.savefig() fig.close() d = p.infodict() d['Title'] = 'Simulations vs measurements comparison ' d['Author'] = u'T. Harmel (SOLVO)' d['CreationDate'] = datetime.datetime.today()
def plt_6steps(dataset): data = pd.read_csv('result-collect/' + dataset + '.csv') data = data.values colors = ['r', 'g', 'b', 'y', 'black', 'grey', 'c', 'm'] marker = ['.', 's', '^', '+', '*', '2', 'x', 'o'] linestyle = [':', '-.', '--', '-', ':', '-.', '--', '-'] models = [ 'ResNet', 'ST-UNet', 'ST-ANN', 'MNNs', 'ConvLSTM', 'AttConvLSTM', 'PCRN', 'ST-Attn' ] # models = ['ST-Attn','ST-Attn_kde','ST-Attn_hm','T-Attn','S-Attn'] plt.figure(figsize=(6, 4)) for i in range(8): plt.plot([1, 2, 3, 4, 5, 6], data[i, :], color=colors[i], linestyle=linestyle[i], lw=1, marker=marker[i], ms=6, label=models[i]) fontsize = 10 plt.xticks(fontsize=fontsize, color='black') plt.ylabel('RMSE', fontsize=fontsize, color='black') plt.xlabel('predicting step', fontsize=fontsize, color='black') plt.yticks(fontsize=fontsize, color='black') plt.legend(fontsize=fontsize - 2) plt.grid() # plt.show() pp = pdf('figs/' + dataset + 'x.pdf') plt.savefig(pp, format='pdf') pp.close()
def save(path): """ Wrapper for saving plot using PdfPages. Returns True if successful, false otherwise. """ if not pdf: return False page = pdf(path) page.savefig() page.close() pp.close() return True
def plot_with_date(tb_i, te_i, trgt, dataset='citybike', fname='fig1', ylabel='In-Flow of [16,16]'): gridi = 16 if dataset == 'BJTaxi' else 8 tslot = 30 if dataset == 'BJTaxi' else 60 resh = 48 if dataset == 'BJTaxi' else 24 if dataset == 'BJTaxi': tindex = dateindex('2015-11-01', '2016-04-09', tslot) else: tindex = dateindex('2015-07-01', '2016-06-30', tslot) inflow = trgt[-len(tindex):, gridi, gridi, 0] inflow = inflow.reshape([-1, resh]).sum(axis=1) if dataset == 'BJTaxi': tindex = dateindex('2015-11-01', '2016-04-09', resh * tslot) else: tindex = dateindex('2015-07-01', '2016-06-30', resh * tslot) tindex = list(tindex) years = dates.YearLocator() months = dates.MonthLocator() dfmt = dates.DateFormatter('%b') ax = plt.figure() ax.set_size_inches(5, 2) ax = ax.add_subplot(111) ax.xaxis.set_major_locator(months) # ax.xaxis.set_minor_locator(years) ax.xaxis.set_major_formatter(dfmt) ax.set_xlim(tindex[tb_i], tindex[te_i]) lw, ls = 0.5, '-' # plt.plot(tindex, read_line,color='r',linewidth=lw,linestyle='--',label='average') # plt.plot(tindex, result_line,color='black',linewidth=lw,linestyle=ls,label='rainy on Oct 27') plt.plot(tindex, inflow, color='r', linewidth=lw, label='In-Flow') # plt.plot(tindex, outflow, color='r', linewidth=lw, linestyle='--', label='Out-FLow') plt.xticks(fontsize=6, color='black') plt.ylabel(ylabel, fontsize=6, color='black') plt.yticks(fontsize=6, color='black') plt.grid() pp = pdf('figs/' + fname + '.pdf') plt.savefig(pp, format='pdf') pp.close()
def plt_heatmap(dataset): results_path = '/cluster/zhouyirong09/peer-work/ST-Attn/result-collect/' + dataset + '/' trgt = np.vstack( np.load(results_path + 'ST-Attn/target.npy')) * preprocess_max[dataset] if dataset in ['citybike']: plt.imshow(np.log(trgt[-112, 3, :, :, 0] + 1), cmap='Reds') elif dataset in ['nyctaxi']: plt.imshow(np.log(trgt[-112, 3, ::-1, :, 0] + 1), cmap='Reds') else: plt.imshow(trgt[-112, 3, :, :, 0], cmap='Reds') plt.tick_params(which='both', left=False, bottom=False, labelleft=False, labelbottom=False) pp = pdf('figs/' + dataset + '_heatmap.pdf') plt.savefig(pp, format='pdf') pp.close()
def improvements(): xi = [] datasets = ['BJTaxi', 'nyctaxi', 'citybike'] for dataset in datasets: data = pd.read_csv('result-collect/' + dataset + '.csv') x = data.values x1 = x[-1, :] # x2 = x[:-1,:] # x2.sort(axis=0) # x2 = x2[0,:] x2 = x[-3, :] xi.append((x2 - x1) / x2) colors = ['r', 'g', 'b'] marker = ['.', 's', '^'] linestyle = [':', '-.', '--'] xi = np.array(xi) plt.figure(figsize=(6, 4)) for i in range(3): plt.plot([1, 2, 3, 4, 5, 6], xi[i, :], color=colors[i], linestyle=linestyle[i], lw=1, marker=marker[i], ms=6, label=datasets[i]) fontsize = 10 plt.xticks(fontsize=fontsize, color='black') plt.ylabel('RMSE', fontsize=fontsize, color='black') plt.xlabel('predicting step', fontsize=fontsize, color='black') plt.yticks(fontsize=fontsize, color='black') plt.legend(fontsize=fontsize - 2) plt.grid() # plt.show() pp = pdf('figs/improvement.pdf') plt.savefig(pp, format='pdf') pp.close()
def multipage_compar(self, df, fout, title=''): plt.ioff() with pdf(fout) as p: for (wl, group) in df.groupby(df.wl): fig, self.ax = plt.subplots(figsize=(6, 6)) ymax = max(group.Lsky_mes.max(), group.Lsky_sim.max()) self.ax.set(xlim=(0, ymax), ylim=(0, ymax), aspect=1) self.ax.plot([0, ymax], [0, ymax], '--', color='grey') self.annot(group.Lsky_mes, group.Lsky_sim, ymax) group.plot(x='Lsky_mes', y='Lsky_sim', c="sza", kind='scatter', cmap='gnuplot', ax=self.ax, title=title + ' at ' + str(wl) + ' nm') p.savefig() plt.close() d = p.infodict() d['Title'] = 'Simulations vs measurements comparison ' d['Author'] = u'T. Harmel (SOLVO)' d['CreationDate'] = datetime.datetime.today()
def Main(outFile, pdfFile, strategy): ''' Get the data from sklearn there are now three strategies: 1) do everything with my classes and see the result 2) do everything with my classes but run the runs separately 3) replicate what my classes do without my classes ''' print('\n\nHello there, welcome to testing things. These are our params:' + '\nstrategy:' + str(strategy) + ' / outFile:' + outFile + ' / pdfFile:' + pdfFile) print('Not happy with it? Probably your fault! Enjoy!\n') dataset = load_boston() # dataset = load_diabetes() features = dataset.data labels = dataset.target numberCases = len(labels) stSt = {} cDict = {} cDict['pheno'] = 'houseprice' cDict['fs'] = 'None' cDict['cValue'] = 1000 cDict['eValue'] = 0.001 cDict['kernel'] = 'rbf' cDict['numberCores'] = 10 cDict['gridCv'] = 5 cvObject = an.cv.KFold(numberCases, 10, shuffle=True) # now see if we only run one or multiple if strategy == None: stSt['oldway'] = runShitTheOldWay(features, labels, cvObject, cDict) stSt['ownTrain'] = runShitButNotAll(features, labels, cvObject, cDict) stSt['manualCv'] = runShitHereYourself(features, labels, cDict) stSt['CvOwnTrain'] = runShitOnCv(features, labels, cvObject, cDict) stSt['noCv'] = runShitNoCv(features, labels, cDict) stSt['clean'] = runShitClean(features, labels) # now save the result outF = gzip.open(outFile, 'wb') cPickle.dump(stSt, outF, protocol=2) # and show the results for result in stSt.keys(): if not result == 'oldway': (pPheno, tPheno, errors, cValues) = stSt[result] else: (pPheno, tPheno) = stSt[result] # tell a bit about the data print('Plotting ' + result) #print(' pPheno: ' + str(pPheno.shape) + '/' + str(np.max(pPheno))) print(' tPheno ' + str(tPheno.shape)) print(' tPheno ' + str(np.max(tPheno))) fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle('predicted over true age') tSP4 = fig4.add_subplot(111, title=result) tSP4.plot(tPheno, tPheno) tSP4.plot(tPheno, pPheno, 'co') fig4.subplots_adjust(hspace=0.5, wspace=0.5) pdfFileName = (pdfFile + '_' + result + '.pdf') pd = pdf(pdfFileName) pd.savefig(fig4) pd.close() plt.close(4) print('Just created ' + pdfFile + '\nAll done here!') else: if strategy == 'old': (pPheno, tPheno) = runShitTheOldWay(features, labels, cvObject, cDict) elif strategy == 'own': (pPheno, tPheno, errors, cValues) = runShitButNotAll(features, labels, cvObject, cDict) elif strategy == 'manual': (pPheno, tPheno, errors, cValues) = runShitHereYourself(features, labels, cDict) elif strategy == 'cv': (pPheno, tPheno, errors, cValues) = runShitOnCv(features, labels, cvObject, cDict) elif strategy == 'nocv': (pPheno, tPheno, errors, cValues) = runShitNoCv(features, labels, cDict) elif strategy == 'clean': (pPheno, tPheno, errors, cValues) = runShitClean(features, labels) else: print('Bullshit arguments!') # tell a bit about the data print('Plotting ' + strategy) print(' pPheno: ' + str(pPheno.shape) + '/' + str(np.max(pPheno))) print(' tPheno' + str(tPheno.shape) + '/' + str(np.max(tPheno))) # and now display the stuff fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle('predicted over true age') tSP4 = fig4.add_subplot(111, title=strategy) tSP4.plot(tPheno, tPheno) tSP4.plot(tPheno, pPheno, 'co') fig4.subplots_adjust(hspace=0.5, wspace=0.5) pdfFileName = (pdfFile + '_strategy_' + strategy + '.pdf') pd = pdf(pdfFileName) pd.savefig(fig4) pd.close() plt.close(4) print('Just created ' + pdfFile + '\nAll done here!')
return self.var def getStd(self): return self.std def getErr(self): return self.err #Setup PDF File Info spacing = 11000 date = datetime.datetime.now() path = 'PDF/' file = '{}_{}_{}_AnchorSpacing-{}.pdf'.format(date.year, date.month, date.day, spacing) page = pdf(path + file) #Ask to select relevant files file = tkFileDialog.askopenfilenames() #Lump SampleData class into a list for entire anchor configuration samples = [] for f in file: s = SampleData(f) samples.append(s) dataMap_Figs = dataMap( samples, spacing) #Plot data points, standard deviations, errorbar heat_Figs = heat(samples, spacing) #Generate error and variance heat meaps tables = dataTable( samples
def Visualize(study, analysis): print('Fetching analysis ' + analysis + ' now. Hold on to your heads!') tempAnalysis = study.analyses[analysis] networkNames = tempAnalysis.networks.keys() networkNames.sort() numberNetworks = float(len(networkNames)) tempNet = tempAnalysis.networks.values()[0] numberSubjects = float(len(tempNet.truePheno)) aName = analysis netFeatInd, networkNumbers = FeatureIndex(tempAnalysis) valueDict = {} shappStore = np.array([]) errList = [] maeList = [] normCount = 0 # a matrix to store networks by subjects prediction-errors for crosscorr kendallMat = np.array([]) netErrMat = np.array([]) netAbsMat = np.array([]) for network in networkNames: tempNetwork = tempAnalysis.networks[network] tempDict = {} tempTrue = tempNetwork.truePheno tempPred = tempNetwork.predictedPheno tempErr = tempPred - tempTrue # append error to errorlist for ANOVA errList.append(tempErr) tempAbs = np.absolute(tempErr) tempMae = np.mean(tempAbs) # now rank those ages and store the ranks in the matrix to calculate # Kendall's W # must be in the same order for all networks tempRanks = np.argsort(tempPred) ranks = np.empty(len(tempRanks), int) ranks[tempRanks] = np.arange(len(tempRanks)) ranks += 1 if kendallMat.size == 0: kendallMat = ranks[None, ...] else: kendallMat = np.concatenate((kendallMat, ranks[None, ...]), axis=0) # now get the features for this network meanFeatures = NetworkFeatures(tempNetwork) # store the features under the name of the network they connect to netInd = netFeatInd[network] netInd = netInd[None, ...] print('meanFeat ' + str(meanFeatures.shape)) print('netInd ' + str(netInd.shape)) tempFeatStore = {} for netNum in networkNumbers.keys(): netNumber = networkNumbers[netNum] # store this stuff tempFeatStore[netNum] = meanFeatures[netInd == netNumber] if netErrMat.size == 0: # first entry, populate netErrMat = tempErr[None, ...] else: # concatenate any further values netErrMat = np.concatenate((netErrMat, tempErr[None, ...]), axis=0) # append absolute error to netAbs Matrix for cross correlation if netAbsMat.size == 0: # first entry, populate netAbsMat = tempAbs[None, ...] else: # concatenate any further values netAbsMat = np.concatenate((netAbsMat, tempAbs[None, ...]), axis=0) # append mae to maelist for display maeList.append(tempMae) tempStd = np.std(tempErr) # get the p value of the shapiro-wilk test tempShapp = st.shapiro(tempErr)[1] if tempShapp >= 0.05: normCount += 1 shappStore = np.append(shappStore, tempShapp) # assign these values to the DICT tempDict['true'] = tempTrue tempDict['pred'] = tempPred tempDict['error'] = tempErr tempDict['abs'] = tempAbs tempDict['std'] = tempStd tempDict['shapp'] = tempShapp tempDict['mae'] = tempMae tempDict['weights'] = tempFeatStore # put the dictionary in the valueDict valueDict[network] = tempDict # now run the tests to determine if we can run the ANOVA if shappStore.max() >= 0.05: print 'All networks are nicely normally distributed' # now run the ANOVA thing - right now, we run just everything anova = st.f_oneway(*errList) print '\nANOVA has run' print ('Behold the amazing F of ' + str(round(anova[0], 4)) + ' and p ' + str(round(anova[1], 4))) else: print 'not all networks are normally distributed' print (str(normCount) + ' out of ' + str(numberNetworks) + ' networks are normally distributed') anova = (999, 999) # now do the fancy Kendall's W business # first get the vector of summed total ranks across all networks (cols) print('Kendalls') print('nNet = ' + str(numberNetworks) + ' nSub = ' + str(numberSubjects)) print(kendallMat.shape) sumRankVec = np.sum(kendallMat, axis=0) print(sumRankVec) meanRank = 1.0 / 2.0 * numberNetworks * (numberSubjects + 1) print(meanRank) sumSquaredDevs = np.sum((sumRankVec - meanRank) ** 2) print(sumSquaredDevs) kendallsW = 12.0 * sumSquaredDevs / ((numberNetworks ** 2.0) * ((numberSubjects ** 3) - numberSubjects)) txtKendallsW = ('Kendall\'s W = ' + str(kendallsW)) print('Kendall\'s W = ' + str(kendallsW)) # now cols are hardcoded and rows depend on them cols = 2.0 rows = np.ceil(numberNetworks / cols) # figure for text displays fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150) fig0.suptitle(aName) fig1 = plt.figure(1) fig1.suptitle('boxplots of error variance') # fig1.tight_layout() fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150) fig2.suptitle('error over true age') # fig2.tight_layout() fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150) fig3.suptitle('absolute error over true age') # fig3.tight_layout() fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle('predicted over true age') # fig4.tight_layout() fig5 = plt.figure(5) fig5.suptitle('mean absolute error of the networks') ''' fig6 = plt.figure(6) fig6.suptitle('correlation of errors between networks') ''' fig7 = plt.figure(7) fig7.suptitle('correlation of absolute errors between networks') # another figure for text displays fig8 = plt.figure(0, figsize=(8.5, 11), dpi=150) fig8.suptitle(aName) loc = 1 txtMae = '' # txtRmse = '' # txtNodes = '' # txtFeat = '' txtCorr = '' txtParm = '' errorVarList = [] errorNameList = [] numberFolds = None nitFigList = [] trueAge = None loopFigId = 99 figIds = [] # now loop over the networks and get the data for network in networkNames: # first get the values from the dict tD = valueDict[network] # then start with the texts txtMae = (txtMae + 'MAE of ' + network + ' = ' + str(np.round(tD['mae'], 3)) + '\n') # txtRmse = (txtRmse + 'RMSE of ' + networkName # + ' = ' + str(tD['rmse']) + '\n') # read out temporary network file tempNet = tempAnalysis.networks[network] tpCorr = st.pearsonr(tempNet.truePheno, tempNet.predictedPheno)[0] txtCorr = (txtCorr + 'Pearson\'s r for ' + network + ' = ' + str(np.round(tpCorr, 3)) + '\n') txtParm = (txtParm + 'Parameters for ' + network + ': C = ' + str(np.round(tempNet.cValue, 3)) + ' E = ' + str(np.round(tempNet.eValue, 6)) + '\n') numberFolds = len(tempNet.cvObject) trueAge = tempNet.truePheno errorVarList.append(tD['error']) errorNameList.append(network) tSP2 = fig2.add_subplot(rows, cols, loc, title=network) tSP2.plot(tD['true'], tD['error'], 'co') tSP3 = fig3.add_subplot(rows, cols, loc, title=network) tSP3.plot(tD['true'], tD['abs'], 'co') tSP4 = fig4.add_subplot(rows, cols, loc, title=network) tSP4.plot(tD['true'], tD['true']) tSP4.plot(tD['true'], tD['pred'], 'co') ''' tSP6 = fig6.add_subplot(rows, cols, loc, title=network) tSP6.hist(tD[network], bins=20) # add 1 to the localization variable ''' # make the loop for the network boxplot figures # for the boxplots, we have to append the data to a list # first get the current list of networks weightDict = tD['weights'] netWeightList = [] for netName in weightDict.keys(): netWeightList.append(weightDict[netName]) print(network + ' ' + netName + ' ' + str(len(weightDict[netName]))) print(network + ' netweightlength ' + str(len(netWeightList))) # got all the weight vectors in here, now create a figure and # use loopFigId as index tempFigure = plt.figure(loopFigId) tempSubPlot = tempFigure.add_subplot(111) # boxIndex = np.arange(len(netWeightList)) tempSubPlot.boxplot(netWeightList) tempSubPlot.set_ylabel('weight distribution for network ' + network) # tempSubPlot.set_xticks(boxIndex) # tempSubPlot.set_xticklabels(networkNames) plt.setp(tempSubPlot, xticklabels=networkNames) tempFigure.autofmt_xdate() # now store figure in list nitFigList.append(tempFigure) loc += 1 figIds.append(loopFigId) loopFigId += 1 # now create the text for the whole study txtName = ('The name of the current analysis is ' + aName) txtKernel = ('Here, a ' + tempAnalysis.kernel + ' kernel was used') txtFeat = ('The feature selection was ' + str(tempAnalysis.featureSelect)) # txtConn = ('The connectivity trained on was ' + analysis.connType) txtFolds = (str(numberFolds) + ' folds were run while estimating age') txtAnova = ('ANOVA of Network effect on prediction error returned:\nF = ' + str(np.round(anova[0], 3)) + ' p = ' + str(np.round(anova[1], 3))) txtAge = ('Their ages ranged from ' + str(np.round(trueAge.min(), 2)) + ' to ' + str(np.round(trueAge.max(), 2)) + ' years of age (SD = ' + str(np.round(np.std(trueAge), 2)) + ')') statString = (txtName + '\n' + txtKernel + '\n' + txtFeat + '\n' + txtFolds + '\n' + txtAnova + '\n' + txtAge + '\n' + txtKendallsW) # + txtRmse + '\n\n' dynString = (txtMae + '\n\n' + txtCorr + '\n\n' + txtParm) fullString = (statString + '\n\n\n' + dynString) # let's build the text fig0.text(0.1, 0.2, fullString) # now we can build figure 1 tSP1 = fig1.add_subplot(111) tSP1.boxplot(errorVarList) plt.setp(tSP1, xticklabels=errorNameList) fig1.autofmt_xdate() # and now we build figure 5 tSP5 = fig5.add_subplot(111) indMae = range(len(maeList)) tSP5.bar(indMae, maeList, facecolor='#99CCFF', align='center') tSP5.set_ylabel('MAE for network') tSP5.set_xticks(indMae) # set x-labels to the network names tSP5.set_xticklabels(networkNames) fig5.autofmt_xdate() # and lastly figure 6 with the crosscorrelations ''' tSP6 = fig6.add_subplot(111) # run correlation analysis netCorrErr = np.corrcoef(netErrMat) tSP6.pcolor(ageMat) tSP6.pcolor(netCorrErr) for y in range(netCorrErr.shape[0]): for x in range(netCorrErr.shape[1]): tSP6.text(x + 0.5, y + 0.5, '%.2f' % netCorrErr[y, x], horizontalalignment='center', verticalalignment='center', ) ''' # and then the same thing for absolute errors tSP7 = fig7.add_subplot(111) # run correlation analysis netCorrAbs = np.corrcoef(netAbsMat) tSP7.pcolor(kendallMat) '''tSP7.pcolor(netCorrAbs) for y in range(netCorrAbs.shape[0]): for x in range(netCorrAbs.shape[1]): tSP7.text(x + 0.5, y + 0.5, '%.2f' % netCorrAbs[y, x], horizontalalignment='center', verticalalignment='center', ) ''' # adjust the images fig1.subplots_adjust(hspace=0.5, wspace=0.5) fig2.subplots_adjust(hspace=0.5, wspace=0.5) fig3.subplots_adjust(hspace=0.5, wspace=0.5) fig4.subplots_adjust(hspace=0.5, wspace=0.5) fig5.subplots_adjust(hspace=0.5, wspace=0.5) # fig6.subplots_adjust(hspace=0.5, wspace=0.5) fig7.subplots_adjust(hspace=0.5, wspace=0.5) # now save all that to a pdf pp = pdf((aName + '_results.pdf')) pp.savefig(fig0) pp.savefig(fig1) pp.savefig(fig2) pp.savefig(fig3) pp.savefig(fig4) pp.savefig(fig5) # pp.savefig(fig6) pp.savefig(fig7) for figure in nitFigList: pp.savefig(figure) pp.close() plt.close(1) plt.close(2) plt.close(3) plt.close(4) plt.close(5) plt.close(6) plt.close(7) for figId in figIds: plt.close(figId) print '\nDone saving. Have a nice day.'
def Visualize(study, analysis): print('Fetching analysis ' + analysis + ' now. Hold on to your heads!') tempAnalysis = study.analyses[analysis] networkName = tempAnalysis.networks.keys()[0] if len(tempAnalysis.networks.keys()) > 1: print('more than one network in there smartass') # begin with single network analysis network = tempAnalysis.networks[networkName] tempTrue = network.truePheno tempPred = network.predictedPheno tempErr = tempPred - tempTrue tempAbs = np.absolute(tempErr) tempMae = np.mean(tempAbs) errorVarList = [tempErr] # figure for text displays fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150) fig0.suptitle(analysis) fig1 = plt.figure(1) fig1.suptitle('boxplots of error variance') # fig1.tight_layout() tSP1 = fig1.add_subplot(111) tSP1.boxplot(errorVarList) fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150) fig2.suptitle('error over true age') # fig2.tight_layout() tSP2 = fig2.add_subplot(111, title=networkName) tSP2.plot(tempTrue, tempErr, 'co') fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150) fig3.suptitle('absolute error over true age') # fig3.tight_layout() tSP3 = fig3.add_subplot(111, title=networkName) tSP3.plot(tempTrue, tempAbs, 'co') fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle('predicted over true age') # fig4.tight_layout() tSP4 = fig4.add_subplot(111, title=networkName) tSP4.plot(tempTrue, tempTrue) tSP4.plot(tempTrue, tempPred, 'co') errorVarList = [] # then start with the texts txtMae = ('MAE of ' + networkName + ' = ' + str(np.round(tempMae, 3)) + '\n') tpCorr = st.pearsonr(tempTrue, tempPred)[0] txtCorr = ('Pearson\'s r for ' + networkName + ' = ' + str(np.round(tpCorr, 3)) + '\n') txtParm = ('Parameters for ' + networkName + ': C = ' + str(np.round(network.cValue, 3)) + ' E = ' + str(np.round(network.eValue, 6)) + '\n') numberFolds = len(network.cvObject) trueAge = tempTrue # now create the text for the whole study txtName = ('The name of the current analysis is ' + analysis) txtKernel = ('Here, a ' + tempAnalysis.kernel + ' kernel was used') txtFeat = ('The feature selection was ' + str(tempAnalysis.featureSelect)) # txtConn = ('The connectivity trained on was ' + analysis.connType) txtFolds = (str(numberFolds) + ' folds were run while estimating age') txtAge = ('Their ages ranged from ' + str(np.round(trueAge.min(), 2)) + ' to ' + str(np.round(trueAge.max(), 2)) + ' years of age (SD = ' + str(np.round(np.std(trueAge), 2)) + ')') statString = (txtName + '\n' + txtKernel + '\n' + txtFeat + '\n' + txtFolds + '\n' + '\n' + txtAge) dynString = (txtMae + '\n\n' + txtCorr + '\n\n' + txtParm) fullString = (statString + '\n\n\n' + dynString) # let's build the text fig0.text(0.1, 0.2, fullString) # now save all that to a pdf pp = pdf((analysis + '_results.pdf')) pp.savefig(fig0) pp.savefig(fig1) pp.savefig(fig2) pp.savefig(fig3) pp.savefig(fig4) pp.close() print '\nDone saving. Have a nice day.'
def Main(loadFile): # first get the file in - this may throw an error if the class of the # analysis object is not in the current PYTHONPATH - I'll find a solution # for this later openFile = gzip.open(loadFile, "rb") analysis = cPickle.load(openFile) aName = analysis.name # loop over the networks and store their information in a DICT valueDict = {} shappStore = np.array([]) errList = [] maeList = [] normCount = 0 networks = analysis.networks networkNames = networks.keys() networkNames.sort() for networkName in networkNames: # all the values are stored in another DICT tempDict = {} tempNet = networks[networkName] tempTrue = tempNet.trueData tempPred = tempNet.predictedData tempErr = tempTrue - tempPred # append error to errorlist for ANOVA errList.append(tempErr) tempAbs = np.absolute(tempErr) tempMae = np.mean(tempAbs) # append mae to maelist for display maeList.append(tempMae) tempStd = np.std(tempErr) # get the p value of the shapiro-wilk test tempShapp = st.shapiro(tempErr)[1] if tempShapp >= 0.05: normCount += 1 shappStore = np.append(shappStore, tempShapp) # assign these values to the DICT tempDict["true"] = tempTrue tempDict["pred"] = tempPred tempDict["error"] = tempErr tempDict["abs"] = tempAbs tempDict["std"] = tempStd tempDict["shapp"] = tempShapp tempDict["mae"] = tempMae # put the dictionary in the valueDict valueDict[networkName] = tempDict # now run the tests to determine if we can the ANOVA not implemented yet if shappStore.max() >= 0.05: print "All networks are nicely normally distributed" # now run the ANOVA thing - right now, we run just everything anova = st.f_oneway(*errList) print "\nANOVA has run" print ("Behold the amazing F of " + str(round(anova[0], 4)) + " and p " + str(round(anova[1], 4))) else: print "not all networks are normally distributed" print (str(normCount) + " out of " + str(len(networkNames)) + " networks are normally distributed") """ now make with the visualization as a reminder: these are the figures we are using 1) Boxplots of the network-specific distributions of raw errors 2) Plot of raw error over true age 3) Plot of absolute error over true age 4) Plot of predicted age over true age with MAE in the legend now go and prepare for this """ numberNetworks = len(networkNames) """ I am taking this part out of the code because I only want two lines of plots: edge = np.sqrt(numberNetworks) if np.ceil(edge) == edge: print 'how nice, all', str(numberNetworks), 'networks fit in ' rows = int(edge) cols = int(edge) else: print 'nah, not all', str(numberNetworks), 'networks are going to fit ' rows = int(np.ceil(edge)) cols = int(np.ceil(edge)) leftOver = rows * cols - numberNetworks print str(leftOver), 'subplots will be left empty ' """ # now cols are hardcoded and rows depend on them cols = 2.0 rows = np.ceil(numberNetworks / cols) # figure for text displays fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150) fig0.suptitle(aName) fig1 = plt.figure(1) fig1.suptitle("boxplots of error variance") # fig1.tight_layout() fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150) fig2.suptitle("error over true age") # fig2.tight_layout() fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150) fig3.suptitle("absolute error over true age") # fig3.tight_layout() fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle("predicted over true age") # fig4.tight_layout() fig5 = plt.figure(5) fig5.suptitle("mean absolute error of the networks") loc = 1 """ Let's take on the text processing While looping through the networks I will populate the strings I need for information purposes. I am going to display: Only once 1) The name of the study 2) The kernel 3) The feature selection technique 4) The kind of connectivity trained on 5) The number of crossvalidations (folds) 6) Results of the ANOVA (F statistic and p value) 7) The number of subjects 8) The age distribution of the subjects For each network 1) MAE 3) Number of nodes in the network (not yet possible) 4) Number of features used for training / Full number of features (also not yet possible) 5) correlation of true and predicted age (+R^2) 6) parameters used for running So let's prepare these text variables before the loop """ txtMae = "" # txtRmse = '' # txtNodes = '' # txtFeat = '' txtCorr = "" txtParm = "" errorVarList = [] errorNameList = [] numberFolds = None numberSubs = None trueAge = None # now loop over the networks and get the data for networkName in networkNames: # first get the values from the dict tD = valueDict[networkName] # then start with the texts txtMae = txtMae + "MAE of " + networkName + " = " + str(np.round(tD["mae"], 3)) + "\n" # txtRmse = (txtRmse + 'RMSE of ' + networkName # + ' = ' + str(tD['rmse']) + '\n') # read out temporary network file tempNet = networks[networkName] tpCorr = st.pearsonr(tempNet.trueData, tempNet.predictedData)[0] txtCorr = txtCorr + "Pearson's r for " + networkName + " = " + str(np.round(tpCorr, 3)) + "\n" txtParm = ( txtParm + "Parameters for " + networkName + ": C = " + str(np.round(tempNet.C, 3)) + " E = " + str(np.round(tempNet.E, 3)) + "\n" ) numberFolds = tempNet.numberFolds numberSubs = len(tempNet.subNames) trueAge = tempNet.trueData # for the boxplots, we have to append the data to a list errorVarList.append(tD["error"]) errorNameList.append(networkName) tSP2 = fig2.add_subplot(rows, cols, loc, title=networkName) tSP2.plot(tD["true"], tD["error"], "co") tSP3 = fig3.add_subplot(rows, cols, loc, title=networkName) tSP3.plot(tD["true"], tD["abs"], "co") tSP4 = fig4.add_subplot(rows, cols, loc, title=networkName) tSP4.plot(tD["true"], tD["true"]) tSP4.plot(tD["true"], tD["pred"], "co") # add 1 to the localization variable loc += 1 # now create the text for the whole study txtName = "The name of the current analysis is " + aName txtKernel = "Here, a " + analysis.kernel + " kernel was used" txtFeat = "The feature selection was " + str(analysis.fs) # txtConn = ('The connectivity trained on was ' + analysis.connType) txtFolds = str(numberFolds) + " folds were run while estimating age" txtAnova = ( "ANOVA of Network effect on prediction error returned:\nF = " + str(np.round(anova[0], 3)) + " p = " + str(np.round(anova[1], 3)) ) txtSubs = "There were " + str(numberSubs) + " subjects in this analysis" txtAge = ( "Their ages ranged from " + str(np.round(trueAge.min(), 2)) + " to " + str(np.round(trueAge.max(), 2)) + " years of age (SD = " + str(np.round(np.std(trueAge), 2)) + ")" ) statString = ( txtName + "\n" + txtKernel + "\n" + txtFeat # + '\n' + txtConn + "\n" + txtFolds + "\n" + txtAnova + "\n" + txtSubs + "\n" + txtAge ) # + txtRmse + '\n\n' dynString = txtMae + "\n\n" + txtCorr + "\n\n" + txtParm fullString = statString + "\n\n\n" + dynString # let's build the text fig0.text(0.1, 0.2, fullString) # now we can build figure 1 tSP1 = fig1.add_subplot(111) tSP1.boxplot(errorVarList) plt.setp(tSP1, xticklabels=errorNameList) # and now we build figure 5 tSP5 = fig5.add_subplot(111) indMae = range(len(maeList)) tSP5.bar(indMae, maeList, facecolor="#99CCFF", align="center") tSP5.set_ylabel("MAE for network") tSP5.set_xticks(indMae) # set x-labels to the network names tSP5.set_xticklabels(networkNames) fig5.autofmt_xdate() # adjust the images fig1.subplots_adjust(hspace=0.5, wspace=0.5) fig2.subplots_adjust(hspace=0.5, wspace=0.5) fig3.subplots_adjust(hspace=0.5, wspace=0.5) fig4.subplots_adjust(hspace=0.5, wspace=0.5) fig5.subplots_adjust(hspace=0.5, wspace=0.5) # now save all that to a pdf pp = pdf((aName + "_results.pdf")) pp.savefig(fig0) pp.savefig(fig1) pp.savefig(fig2) pp.savefig(fig3) pp.savefig(fig4) pp.savefig(fig5) pp.close() print "\nDone saving. Have a nice day." pass
def Main(inFile, outFile, pdfFile): ''' Load the file, cut it into pieces and print the last line ''' loadFile = open(inFile, 'rb') fileLines = loadFile.readlines() subDir = {} # storage for comparing other models featMat = np.array([]) labVec = np.array([]) subCount = 1 for line in fileLines: useLine = line.strip().split() run = 1 # make a new subject subName = ('case_' + str(subCount)) tempSub = pp.Subject(subName, 'test') tempFeat = np.array([]) for word in useLine: if run == 4 or run == 9: pass elif run == 14: tempPheno = float(word) else: tempFeat = np.append(tempFeat, float(word)) run += 1 tempSub.pheno = {} tempSub.pheno['houseprice'] = tempPheno tempSub.feature = tempFeat subDir[subName] = tempSub subCount += 1 # and add them also to the storage vars if featMat.size == 0: featMat = tempFeat[None, ...] else: featMat = np.concatenate((featMat, tempFeat[None, ...]), axis=0) labVec = np.append(labVec, tempPheno) print(str(featMat.shape) + '/' + str(labVec.shape)) # now make a network of it and run that stuff numberSubjects = len(subDir.keys()) print(numberSubjects) # make a crossvalidation object cvObject = an.cv.KFold(numberSubjects, 10, shuffle=True) testNetwork = an.Network('test', cvObject) testNetwork.subjects = subDir testNetwork.pheno = 'houseprice' testNetwork.featureSelect = 'None' testNetwork.cValue = 1000 testNetwork.gridCv = 5 testNetwork.gridCores = 1 testNetwork.eValue = 0.001 testNetwork.kernel = 'rbf' # set number of parallel processes in Network testNetwork.numberCores = 10 # make the runs print(len(testNetwork.subjects.keys())) print(len(testNetwork.cvObject)) testNetwork.makeRuns() # now run the runs testNetwork.executeRuns() # and also run the other model quickly (modelPred, modelTrue) = compareLogReg(featMat, labVec) print('\nGot here') # now save the result outF = gzip.open(outFile, 'wb') cPickle.dump(testNetwork, outF, protocol=2) # and display the rest pPheno = testNetwork.predictedPheno tPheno = testNetwork.truePheno fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150) fig4.suptitle('predicted over true age') tSP4 = fig4.add_subplot(111, title=testNetwork.name) tSP4.plot(tPheno, tPheno) tSP4.plot(tPheno, pPheno, 'co') tSP4.plot(modelTrue, modelPred, 'mo') fig4.subplots_adjust(hspace=0.5, wspace=0.5) pd = pdf(pdfFile) pd.savefig(fig4) pd.close() plt.close(4) print('Just created ' + pdfFile + '\nAll done here!')
def main(inargs): """Run the program.""" # specify source files from a model run, and year and month to plot dir_1 = inargs.dir_1 tim_1 = inargs.mon_1 if len(tim_1) != 7: print('INPUT ERROR: Date(s) must be in YYYY-MM format.') sys.exit() # abort print('First source: ', dir_1) mod_1 = dir_1.split('/')[-4] run_1 = dir_1.split('/')[-3] sce_1 = dir_1.split('/')[-2] dir_1_var = glob.glob(dir_1 + '*/*/*', recursive=True) # identify all reported variables if dir_1_var == []: print( 'INPUT ERROR: Directory specification error (no variables found).') print(dir_1) sys.exit() if mod_1[0:7] == 'GISS-E2': # skip *fx* and *fy* variables dir_1_var = [i for i in dir_1_var if 'fx' not in i] dir_1_var = [i for i in dir_1_var if 'fy' not in i] print( 'WARNING: Skipping *fx* and *fy* from E2 owing to potential dimensionality differences.' ) if inargs.include != None: dir_1_var_incl = [] for i_1, d_1 in enumerate(inargs.include.split( ',')): # iterate over comma-separated list var_incl = [i for i in dir_1_var if d_1 in i] # include only specified variable(s) dir_1_var_incl.extend(var_incl) dir_1_var = list(dir_1_var_incl) if dir_1_var == []: print( 'DATA ERROR: First directory output is missing specified variable(s).' ) print(inargs.include) sys.exit() elif inargs.exclude != None: for i_1, d_1 in enumerate(inargs.exclude.split( ',')): # iterate over comma-separated list dir_1_var = [i for i in dir_1_var if d_1 not in i] # exclude specified variable(s) if dir_1_var == []: print( 'DATA ERROR: First directory output is empty beyond excluded variable(s).' ) print(inargs.exclude) sys.exit() ncs_1 = [] for i_1, d_1 in enumerate(dir_1_var): v_all = sorted(glob.glob(dir_1_var[i_1] + '/*', recursive=True)) # all versions if v_all != []: if inargs.first: # optionally use first version in the output f_all = sorted( glob.glob(v_all[0] + '/*.nc')) # all files in first version else: # otherwise use default last (most recent version) f_all = sorted(glob.glob(v_all[-1] + '/*.nc')) f_tim = [i for i in f_all if '201412' in i] if f_tim == []: f_tim = [i for i in f_all if tim_1.replace('-', '') in i] if f_tim != []: ncs_1.append(f_tim[0]) try: # make sure that year and month output exist dat_1 = xr.open_dataset(ncs_1[0]).sel(time=tim_1) except: print( 'DATA ERROR: First directory output is missing specified year and month.' ) print(ncs_1) sys.exit() # abort if date and month are not found # optionally specify source files from a second model run if inargs.compare: dir_2 = inargs.dir_2 if inargs.mon_2 != None: tim_2 = inargs.mon_2 # optional different year and month from second run else: tim_2 = tim_1 # or same year and month from second run print('Second source: ', dir_2, tim_2) mod_2 = dir_2.split('/')[-4] run_2 = dir_2.split('/')[-3] sce_2 = dir_2.split('/')[-2] dir_2_var = glob.glob(dir_2 + '*/*/*', recursive=True) if dir_2_var == []: print( 'INPUT ERROR: Directory specification error (no variables found).' ) print(dir_2) sys.exit() if mod_2[0:7] == 'GISS-E2': # skip *fx* and *fy* variables dir_2_var = [i for i in dir_2_var if 'fx' not in i] dir_2_var = [i for i in dir_2_var if 'fy' not in i] print( 'WARNING: Skipping *fx* and *fy* from E2 owing to potential dimensionality differences.' ) dir_2_var = [i for i in dir_2_var if 'fx' not in i] if inargs.include != None: dir_2_var_incl = [] for i_2, d_2 in enumerate(inargs.include.split( ',')): # iterate over comma-separated list var_incl = [i for i in dir_2_var if d_2 in i] # include only specified variable(s) dir_2_var_incl.extend(var_incl) dir_2_var = list(dir_2_var_incl) if dir_2_var == []: print( 'DATA ERROR: Second directory output is missing specified variable(s).' ) print(inargs.include) sys.exit() elif inargs.exclude != None: for i_2, d_2 in enumerate(inargs.exclude.split( ',')): # iterate over comma-separated list dir_2_var = [i for i in dir_2_var if d_2 not in i] # exclude specified variable(s) if dir_2_var == []: print( 'DATA ERROR: Second directory output is empty beyond excluded variable(s).' ) print(inargs.exclude) sys.exit() ncs_2 = [] for i_2, d_2 in enumerate(dir_2_var): v_all = sorted(glob.glob(dir_2_var[i_2] + '/*', recursive=True)) if v_all != []: if inargs.first: f_all = sorted(glob.glob(v_all[0] + '/*.nc')) else: f_all = sorted(glob.glob(v_all[-1] + '/*.nc')) f_tim = [i for i in f_all if '201412' in i] if f_tim == []: f_tim = [i for i in f_all if tim_1.replace('-', '') in i] if f_tim != []: ncs_2.append(f_tim[0]) try: dat_2 = xr.open_dataset(ncs_2[0]).sel(time=tim_2) except: print( 'DATA ERROR: Second directory output is missing specified year and month.' ) print(ncs_2[0]) sys.exit() # specify local destination for output comparison plots if inargs.compare: out_pdf = mod_1 + '_' + run_1 + '_' + sce_1 + '_vs_' + mod_2 + '_' + run_2 + '_' + sce_2 + '.pdf' else: out_pdf = mod_1 + '_' + run_1 + '_' + sce_1 + '.pdf' # loop over source files in first model run pp = pdf('multipage.pdf' ) # initialize multipage package to receive sequential images print('Processing first source ...') for i_1, f_1 in enumerate(ncs_1): # loop over variables identified above print(f_1) dat_1 = xr.open_dataset(f_1) var_1 = list(dat_1.data_vars.keys())[-1] # identify the variable name ndims = len(dat_1[var_1].dims) # determine dimensionality if ndims != 2: dat_1 = xr.open_dataset(f_1).sel( time=tim_1) # most fields have time if ndims == 1: # data is a scalar (dummy plot) fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(211) fld_1 = dat_1[var_1].isel(time=0) # scalar value ax.annotate('SCALAR VALUE', xy=(0.4, 0.5), xycoords='axes fraction') path, fname = os.path.split(f_1) parr = path.split(mod_1) title = parr[0] + mod_1 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] # parse directory name for title plt.title(title) val_str = ("value = " + "{:.5e}".format(fld_1.data)) ax.annotate(tim_1 + ' ' + val_str, xy=(0, -0.15), xycoords='axes fraction') if inargs.compare: # optionally search for matching variable in second directory search_str = '/' + fname.split('_')[0] + '_' + fname.split( '_')[1] matching_file = [i for i in ncs_2 if search_str in i] else: matching_file = [] if matching_file != []: f_2 = matching_file[0] print(f_2) dat_2 = xr.open_dataset(f_2) var_2 = list(dat_2.data_vars.keys())[-1] ax = fig.add_subplot(212) fld_2 = dat_2[var_2].isel(time=0) ax.annotate('SCALAR VALUE', xy=(0.4, 0.5), xycoords='axes fraction') path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs['long_name'] plt.title(title) val_str = ("value = " + "{:.5e}".format(fld_2.data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.15), xycoords='axes fraction') fig.tight_layout(pad=6) pp.savefig() # completed page elif ndims == 2: fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot( 211, projection=ccrs.PlateCarree(central_longitude=180)) fld_1 = dat_1[var_1] fld_1.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_1.units}, rasterized=True) ax.coastlines() path, fname = os.path.split(f_1) parr = path.split(mod_1) title = parr[0] + mod_1 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] plt.title(title) val_str = ("min, max, avg = " + "{:.5e}".format(fld_1.min().data) + ", " "{:.5e}".format(fld_1.max().data) + ", " + "{:.5e}".format(fld_1.mean().data)) ax.annotate(tim_1 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') if inargs.compare: # optionally search for matching variable in second directory search_str = '/' + fname.split('_')[0] + '_' + fname.split( '_')[1] matching_file = [i for i in ncs_2 if search_str in i] else: matching_file = [] if matching_file != []: f_2 = matching_file[0] print(f_2) dat_2 = xr.open_dataset(f_2) var_2 = list(dat_2.data_vars.keys())[-1] ax = fig.add_subplot( 212, projection=ccrs.PlateCarree(central_longitude=180)) fld_2 = dat_2[var_2] fld_2.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_2.units}, rasterized=True) ax.coastlines() path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs['long_name'] plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') fig.tight_layout(pad=6) pp.savefig() elif ndims == 3: # data is lat/lon (simplest case to plot) fig = plt.figure(figsize=[8.5, 11]) # initialize letter-size page if dat_1[var_1].dims[1] == 'basin': # initialize top subplot with line plot ax = fig.add_subplot(211) fld_1 = dat_1[var_1].isel(basin=0, time=0) # data to plot subtit = ' (basin=0)' fld_1.plot(ax=ax) else: # initialize top subplot with a mapping projection ax = fig.add_subplot( 211, projection=ccrs.PlateCarree(central_longitude=180)) fld_1 = dat_1[var_1].isel(time=0) # data to plot subtit = '' # plot on specified projection with default color bar, rasterize to reduce file size fld_1.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_1.units}, rasterized=True) ax.coastlines() # parse directory name for title path, fname = os.path.split(f_1) parr = path.split(mod_1) title = parr[0] + mod_1 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit plt.title(title) # calculate statistics and report below figure val_str = ("min, max, mean = " + "{:.5e}".format(fld_1.min().data) + ", " "{:.5e}".format(fld_1.max().data) + ", " + "{:.5e}".format(fld_1.mean().data)) ax.annotate(tim_1 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') if inargs.compare: # optionally search for matching variable in second directory search_str = '/' + fname.split('_')[0] + '_' + fname.split( '_')[1] matching_file = [i for i in ncs_2 if search_str in i] else: matching_file = [] if matching_file != []: # if it exists, execute same procedure for matching data f_2 = matching_file[0] print(f_2) dat_2 = xr.open_dataset(f_2).sel(time=tim_2) var_2 = list(dat_2.data_vars.keys())[-1] if dat_2[var_2].dims[1] == 'basin': ax = fig.add_subplot(212) fld_2 = dat_2[var_2].isel(basin=0, time=0) subtit = ' (basin=0)' fld_2.plot(ax=ax) else: ax = fig.add_subplot( 212, projection=ccrs.PlateCarree(central_longitude=180)) fld_2 = dat_2[var_2].isel(time=0) subtit = '' fld_2.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_2.units}, rasterized=True) ax.coastlines() path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') fig.tight_layout(pad=6) pp.savefig() # completed page elif ndims == 4: # narrow down to either one basin or longitude for plotting fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(211) if dat_1[var_1].dims[1] == 'basin': fld_1 = dat_1[var_1].isel(basin=0, time=0) subtit = ' (basin=0)' else: fld_1 = dat_1[var_1].isel(lon=0, time=0) subtit = ' (lon=0)' fld_1.plot(ax=ax, cbar_kwargs={'label': fld_1.units}, rasterized=True) if dat_1[var_1].dims[1] == ('lev') or dat_1[var_1].dims[1] == ( 'plev'): ax.invert_yaxis() if dat_1[var_1].dims[2] == ('lev'): ax.invert_yaxis() # ocean basin case path, fname = os.path.split(f_1) parr = path.split(mod_1) title = parr[0] + mod_1 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_1.min().data) + ", " "{:.5e}".format(fld_1.max().data) + ", " + "{:.5e}".format(fld_1.mean().data)) ax.annotate(tim_1 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') if inargs.compare: search_str = '/' + fname.split('_')[0] + '_' + fname.split( '_')[1] matching_file = [i for i in ncs_2 if search_str in i] else: matching_file = [] if matching_file != []: f_2 = matching_file[0] print(f_2) dat_2 = xr.open_dataset(f_2).sel(time=tim_2) var_2 = list(dat_2.data_vars.keys())[-1] ax = fig.add_subplot(212) if dat_2[var_2].dims[1] == 'basin': fld_2 = dat_2[var_2].isel(basin=0, time=0) subtit = ' (basin=0)' else: fld_2 = dat_2[var_2].isel(lon=0, time=0) subtit = ' (lon=0)' fld_2.plot(ax=ax, cbar_kwargs={'label': fld_2.units}, rasterized=True) if dat_2[var_2].dims[1] == ('lev') or dat_2[var_2].dims[1] == ( 'plev'): ax.invert_yaxis() if dat_2[var_2].dims[2] == ('lev'): ax.invert_yaxis() # ocean basin case path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') plt.title(title) fig.tight_layout(pad=6) pp.savefig() else: # more than 4 dimensions: also choose a latitude fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(211) fld_1 = dat_1[var_1].isel(lat=0, lon=0, time=0) subtit = ' (Lat/Lon=0/0)' fld_1.plot(ax=ax, cbar_kwargs={'label': fld_1.units}, rasterized=True) path, fname = os.path.split(f_1) parr = path.split(mod_1) title = parr[0] + mod_1 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_1.min().data) + ", " "{:.5e}".format(fld_1.max().data) + ", " + "{:.5e}".format(fld_1.mean().data)) ax.annotate(tim_1 + ' ' + val_str, xy=(0, -0.2), xycoords='axes fraction') if inargs.compare: search_str = '/' + fname.split('_')[0] + '_' + fname.split( '_')[1] matching_file = [i for i in ncs_2 if search_str in i] else: matching_file = [] if matching_file != []: f_2 = matching_file[0] print(f_2) dat_2 = xr.open_dataset(f_2) var_2 = list(dat_2.data_vars.keys())[-1] ax = fig.add_subplot(212) fld_2 = dat_1[var_2].isel(lat=0, lon=0, time=0) subtit = ' (Lat/Lon=0/0)' fld_2.plot(ax=ax, cbar_kwargs={'label': fld_2.units}, rasterized=True) path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.2), xycoords='axes fraction') plt.title(title) fig.tight_layout(pad=6) pp.savefig() plt.close() # clear matplotlib for next page (to avoid overflows) # loop over source files in second model run (plot only any missing from first run) if inargs.compare: print('Processing second source ...') for i_2, f_2 in enumerate(ncs_2): path, fname = os.path.split(f_2) matching_file = [ i for i in ncs_1 if fname.split('_')[0] + '_' + fname.split('_')[1] in i ] if matching_file == []: print(f_2) dat_2 = xr.open_dataset(f_2) var_2 = list(dat_2.data_vars.keys())[-1] ndims = len(dat_2[var_2].dims) if ndims != 2: dat_2 = xr.open_dataset(f_2).sel( time=tim_2) # usually time is a dimension if ndims == 1: fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(212) fld_2 = dat_2[var_2].isel(time=0) ax.annotate('SCALAR VALUE', xy=(0.4, 0.5), xycoords='axes fraction') path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs['long_name'] plt.title(title) val_str = ("value = " + "{:.5e}".format(fld_2.data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.15), xycoords='axes fraction') pp.savefig() elif ndims == 2: fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot( 212, projection=ccrs.PlateCarree(central_longitude=180)) fld_2 = dat_2[var_2] fld_2.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_2.units}, rasterized=True) ax.coastlines() path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs['long_name'] plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') plt.title(title) pp.savefig() elif ndims == 3: fig = plt.figure(figsize=[8.5, 11]) if dat_2[var_2].dims[1] == 'basin': ax = fig.add_subplot(212) fld_2 = dat_2[var_2].isel(basin=0, time=0) subtit = ' (basin=0)' fld_2.plot(ax=ax) else: ax = fig.add_subplot( 212, projection=ccrs.PlateCarree(central_longitude=180)) fld_2 = dat_2[var_2].isel(time=0) subtit = '' fld_2.plot(ax=ax, transform=ccrs.PlateCarree(), cbar_kwargs={'label': fld_2.units}, rasterized=True) ax.coastlines() path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') pp.savefig() elif ndims == 4: fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(212) if dat_2[var_2].dims[1] == 'basin': fld_2 = dat_2[var_2].isel(basin=0, time=0) subtit = ' (basin=0)' else: fld_2 = dat_2[var_2].isel(lon=0, time=0) subtit = ' (lon=0)' fld_2.plot(ax=ax, cbar_kwargs={'label': fld_2.units}, rasterized=True) if dat_2[var_2].dims[1] == ( 'lev') or dat_2[var_2].dims[1] == ('plev'): ax.invert_yaxis() if dat_2[var_2].dims[2] == ('lev'): ax.invert_yaxis() # ocean basin case path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.25), xycoords='axes fraction') pp.savefig() else: fig = plt.figure(figsize=[8.5, 11]) ax = fig.add_subplot(212) fld_2 = dat_2[var_2].isel(lat=0, lon=0, time=0) subtit = ' (Lat/Lon=0/0)' fld_2.plot(ax=ax, cbar_kwargs={'label': fld_2.units}, rasterized=True) path, fname = os.path.split(f_2) parr = path.split(mod_2) title = parr[0] + mod_2 + '\n' + parr[ 1] + '/\n' + fname + '\n' + fld_2.attrs[ 'long_name'] + subtit plt.title(title) val_str = ("min, max, mean = " + "{:.5e}".format(fld_2.min().data) + ", " "{:.5e}".format(fld_2.max().data) + ", " + "{:.5e}".format(fld_2.mean().data)) ax.annotate(tim_2 + ' ' + val_str, xy=(0, -0.2), xycoords='axes fraction') pp.savefig() pp.close() # multipage document complete os.popen('mv multipage.pdf ' + out_pdf) # save document to descriptive file name print('Output file: ', out_pdf)
def Plot_splines(): ### Plot'em! import seaborn as sb import matplotlib.pyplot as plt from matplotlib.lines import Line2D from matplotlib.backends.backend_pdf import PdfPages as pdf # Choose magnification factor. Fluxes will be multiplied by # GeV**mag to better bring out features in steaper regions # (mag = 0 for "pure" fluxes): mag = 3 def Load_data(savename): ### Loading the plot data previously stored by Save_data_for_plots(): filename = dirname + '/plotdata/' + savename + '_plotdata.dat' # Load coszen axis: with open(filename) as f: xc = [line for line in f if line.startswith('# coszen')] xc = [float(i) for i in xc[0].split()[2:]] # Load energy axis, datapoints and splines: loaddata = np.loadtxt(filename, unpack=True) xe = loaddata[0] datapoints = loaddata[1::2] splines = loaddata[2::2] return(xe, xc, datapoints, splines) def Create_title(particle, flavor): ### Create plot title. title = ( 'atmospheric ' + particle[1] + ' flux ' 'for ' + name + ', tabulated data and spline fits' ) title_flavor = ( 'atmospheric ' + flavor[1] + ' fluxes ' 'for ' + name + ', tabulated data and spline fits' ) return title, title_flavor def Create_axlabels(): ### Create title and axis labels. xlabel = r'kinetic energy $E$ [GeV]' ylabel = r'flux $\Phi$ [GeV$^{' + str(mag-1) + r'}$cm$^{-2}$s$^{-1}$sr$^{-1}$]' return xlabel, ylabel def Create_label(particle): label = ( particle[1].split(')')[0].replace('(', '') if 'from' in particle[1] else particle[1].split(' ')[0] ) return label sb.set_context(context='notebook', font_scale=1.2, rc={"lines.linewidth": 2.0}) sb.set_style('whitegrid') xlabel, ylabel = Create_axlabels() markers = '+' custom_legend = [Line2D([0], [0], color='gray', lw=0, marker=markers, label=r'data for $\cos(\theta)=1$')] short_legend = [Line2D([0], [0], color='gray', lw=0, marker=markers, label=r'data')] numcols = 3 flavors = [ ('nue', r'$\nu_{e}$'), ('nuebar', r'$\bar{\nu}_{e}$'), ('numu', r'$\nu_{\mu}$'), ('numubar', r'$\bar{\nu}_{\mu}$'), ('nutau', r'$\nu_{\tau}$'), ('nutaubar', r'$\bar{\nu}_{\tau}$') ] # Currently we have 9 different flux variants to display (total, conv, pi, # k, K0, K0L, K0S, prompt, mu). Adjust the length of this linspace # according to changes in number of variants: flavor_colors = plt.cm.jet(np.linspace(0,1,9)) for f, flavor in enumerate(flavors): pdf_flavor = pdf(dirname+'/plots/perflavor_'+flavor[0]+('_mag'+str(mag) if mag!=0 else '')+'.pdf') fig3, ax3 = plt.subplots(1, 1, figsize=(9,5)) fig3.subplots_adjust(bottom=0.14, top=0.91, left=0.12, right=0.95, wspace=0.2) fig4, axes4 = plt.subplots(3, numcols, figsize=(9,5), sharex='col') fig4.subplots_adjust(bottom=0.13, top=0.76, left=0.1, right=0.95, wspace=0.2, hspace=0.3) fig4.text(0.5, 0.03, r'cosine of zenith angle $\cos(\theta)$', ha='center') fig4.text(0.02, 0.5, r'flux $\Phi$ [GeV$^{-1}$cm$^{-2}$s$^{-1}$sr$^{-1}$]', va='center', rotation='vertical') p=0 for particle in particles: if ( (('bar' not in flavor[0]) and (flavor[0] in particle[0]) and ('bar' not in particle[0])) or (('bar' in flavor[0]) and (flavor[0] in particle[0])) ): savename = name + '_' + particle[0] xe, xc, datapoints, splines = Load_data(savename) # Transpose the data for energy dependence plots: datapointsT, splinesT = np.transpose(datapoints), np.transpose(splines) # Set iterators for xe and coszen dependence plotting (because we don't # want hundreds of fluxes in a plot): eit, cit = int(len(splinesT)/9), int(len(datapoints)/9) colors = plt.cm.jet(np.linspace(0,1,len(splines[::cit]))) title, title_flavor = Create_title(particle, flavor) fig1, ax1 = plt.subplots(1, 1, figsize = (9, 5)) fig1.subplots_adjust(bottom=0.14, top=0.91, left=0.12, right=0.95, wspace=0.2) fig2, axes2 = plt.subplots(3, numcols, figsize=(9, 5), sharex='col') fig2.subplots_adjust(bottom=0.13, top=0.87, left=0.1, right=0.95, wspace=0.2, hspace=0.3) fig2.suptitle(title, fontsize=14) fig2.text(0.5, 0.03, r'cosine of zenith angle $\cos(\theta)$', ha='center') fig2.text(0.02, 0.5, r'flux $\Phi$ [GeV$^{-1}$cm$^{-2}$s$^{-1}$sr$^{-1}$]', va='center', rotation='vertical') fig4.suptitle(title_flavor, fontsize=14) ##--- Energy dependence plots ---------------------------------- pdf_particle = pdf(dirname+'/plots/'+savename+('_mag'+str(mag) if mag!=0 else '')+'.pdf') # Plot the splines: for spline, label, color in zip(splines[::cit], xc[::cit], colors): ax1.loglog(xe, spline*xe**mag, label='%.2f' % label, color=color) # Plot the data points: ax1.loglog(xe, datapoints[-1]*xe**mag, lw=0, marker=markers, color='gray', alpha=0.4) ax1.set_title(title) ax1.set_xlabel(xlabel) ax1.set_ylabel(ylabel) leg11 = ax1.legend(handles=custom_legend, loc='upper right') leg12 = ax1.legend(title=r'$\cos(\theta)$', loc='lower left') ax1.add_artist(leg11) # Per flavor: ax3.loglog(xe, datapoints[-1]*xe**mag, lw=0, marker=markers, color='gray', alpha=0.4) ax3.loglog(xe, splines[-1]*xe**mag, label=Create_label(particle), ls=particle[2], color=flavor_colors[p], alpha=0.8) ax3.set_title(title_flavor) ax3.set_xlabel(xlabel) ax3.set_ylabel(ylabel) # ax3.set_ylim(1e-18) leg31 = ax3.legend(handles=custom_legend, loc='upper right') leg32 = ax3.legend(loc='lower left', ncol=2) ax3.add_artist(leg31) ##--- Coszen dependence plots ---------------------------------- # Plot the data points and splines: for ax2, ax4, dataset, spline, label, color in zip( axes2.flatten(), axes4.flatten(), datapointsT[21::10], splinesT[21::10], xe[21::10], colors ): ax2.tick_params(axis='both', which='major', labelsize=10) ax2.yaxis.offsetText.set_fontsize(10) ax2.set_title(r'at $E\approx$%.0e' % label + ' GeV', fontsize=12, loc='right') ax2.plot(xc, dataset, lw=0, marker=markers, color='gray', alpha=0.4) ax2.plot(xc, spline, color=color) ax2.ticklabel_format(axis='y', style='sci', scilimits=(0,0)) ax4.tick_params(axis='both', which='major', labelsize=10) ax4.yaxis.offsetText.set_fontsize(10) ax4.set_title(r'at $E\approx$%.0e' % label + ' GeV', fontsize=12, loc='right') ax4.plot(xc, dataset, lw=0, marker=markers, color='gray', alpha=0.4) ax4.plot(xc, spline, color=flavor_colors[p], alpha=0.9, ls=particle[2], label=Create_label(particle)) ax4.ticklabel_format(axis='y', style='sci', scilimits=(0,0)) handles, labels = ax4.get_legend_handles_labels() p+=1 pdf_particle.savefig(fig1) pdf_particle.savefig(fig2) pdf_particle.close() if p: leg41 = fig4.legend(handles=short_legend, loc='upper left', fontsize=12, bbox_to_anchor=(0.05, 0.45, 0.86, 0.5)) leg42 = fig4.legend(handles, labels, loc='upper left', ncol=5, mode='expand', bbox_to_anchor=(0.16, 0.45, 0.80, 0.5), fontsize=12) pdf_flavor.savefig(fig3) pdf_flavor.savefig(fig4) pdf_flavor.close()