Esempio n. 1
0
def plt_3d(trgt):

    row, col = trgt.shape[1], trgt.shape[2]
    x, y = np.arange(0, row), np.arange(0, col)
    x, y = np.meshgrid(x, y)
    z = trgt[-5, :, :, 0]
    fig = plt.figure()
    ax = ax3d(fig)
    ax.plot_surface(x, y, z, rstride=3, cstride=3, cmap='rainbow')
    dz = z.ravel()
    offset = dz + np.abs(dz.min())
    fracs = offset / offset.max()
    # norm = colors.Normalize(fracs.min(), fracs.max())
    # color_values = cm.jet(norm(fracs.tolist()))
    cmp = plt.get_cmap('Blues')
    cnorm = colors.Normalize(vmin=0, vmax=1)
    scalar = cm.ScalarMappable(norm=cnorm, cmap=cmp)
    color_values = np.array([scalar.to_rgba(x) for x in fracs])
    # color_values = np.multiply(color_values, [1,1,1,0.8])

    ax.bar3d(x.ravel(),
             y.ravel(),
             np.zeros_like(y.ravel()),
             dx=1,
             dy=1,
             dz=z.ravel(),
             color=color_values)
    ax.view_init(75, 165)
    # ax.set_axis_off()
    # plt.show()
    pp = pdf('figs/0.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()

    fig = plt.figure()
    ax = ax3d(fig)
    gx, gy = np.mgrid[0:31:160j, 0:31:160j]
    gxy = np.array([x.ravel(), y.ravel()])
    gz = griddata(gxy.transpose(), z.ravel(), (gx, gy), method='cubic')
    gz = (gz - gz.min()) / gz.sum()

    z1 = trgt[-10, :, :, 1]
    gz1 = griddata(gxy.transpose(), z1.ravel(), (gx, gy), method='cubic')
    ax.plot_surface(gx, gy, gz, rstride=1, cstride=1, cmap='Reds')
    # ax.plot_surface(gx-8, gy+24, gz1+1000, rstride=1, cstride=1, cmap='Reds')
    # ax.plot_surface(gx, gy, -gz-50, rstride=1, cstride=1, cmap='rainbow')
    ax.view_init(75, 165)
    # ax.set_axis_off()
    # plt.show()
    pp = pdf('figs/3.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
    print('end')
Esempio n. 2
0
    def compar_band_multipage(self,
                              df,
                              fout,
                              xname='Li_Mean_val',
                              yname='Lsky',
                              cname='Solar_Zenith',
                              title=''):
        plt.ioff()
        with pdf(fout) as p:
            for (wl, group) in df.groupby(level=2, axis=1):
                if wl != '':
                    print(wl)
                    group.dropna(inplace=True)
                    x = group.xs(xname, level=1, axis=1).values[:, 0]
                    y = group.xs(yname, level=1, axis=1).values[:, 0]
                    c = group.xs(cname, level=1, axis=1).values[:, 0]

                    fig, self.ax = plt.subplots(figsize=(6, 6))
                    ymax = max(x.max(), y.max())
                    self.ax.set(xlim=(0, ymax), ylim=(0, ymax), aspect=1)
                    self.ax.plot([0, ymax], [0, ymax], '--', color='grey')
                    im = self.ax.scatter(x, y, c=c, cmap='gnuplot')
                    self.annot(x, y, ymax)
                    fig.colorbar(im, ax=self.ax)
                    fig.suptitle(title + ' at ' + str(wl) + ' nm')
                    fig.tight_layout()

                    p.savefig()
                    fig.close()

            d = p.infodict()
            d['Title'] = 'Simulations vs measurements comparison '
            d['Author'] = u'T. Harmel (SOLVO)'
            d['CreationDate'] = datetime.datetime.today()
Esempio n. 3
0
def plt_6steps(dataset):
    data = pd.read_csv('result-collect/' + dataset + '.csv')
    data = data.values
    colors = ['r', 'g', 'b', 'y', 'black', 'grey', 'c', 'm']
    marker = ['.', 's', '^', '+', '*', '2', 'x', 'o']
    linestyle = [':', '-.', '--', '-', ':', '-.', '--', '-']
    models = [
        'ResNet', 'ST-UNet', 'ST-ANN', 'MNNs', 'ConvLSTM', 'AttConvLSTM',
        'PCRN', 'ST-Attn'
    ]
    # models = ['ST-Attn','ST-Attn_kde','ST-Attn_hm','T-Attn','S-Attn']
    plt.figure(figsize=(6, 4))
    for i in range(8):
        plt.plot([1, 2, 3, 4, 5, 6],
                 data[i, :],
                 color=colors[i],
                 linestyle=linestyle[i],
                 lw=1,
                 marker=marker[i],
                 ms=6,
                 label=models[i])

    fontsize = 10
    plt.xticks(fontsize=fontsize, color='black')
    plt.ylabel('RMSE', fontsize=fontsize, color='black')
    plt.xlabel('predicting step', fontsize=fontsize, color='black')
    plt.yticks(fontsize=fontsize, color='black')
    plt.legend(fontsize=fontsize - 2)
    plt.grid()
    # plt.show()
    pp = pdf('figs/' + dataset + 'x.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
Esempio n. 4
0
def save(path):
    """
  Wrapper for saving plot using PdfPages.

  Returns True if successful, false otherwise.
  """
    if not pdf:
        return False
    page = pdf(path)
    page.savefig()
    page.close()
    pp.close()
    return True
Esempio n. 5
0
def plot_with_date(tb_i,
                   te_i,
                   trgt,
                   dataset='citybike',
                   fname='fig1',
                   ylabel='In-Flow of [16,16]'):
    gridi = 16 if dataset == 'BJTaxi' else 8
    tslot = 30 if dataset == 'BJTaxi' else 60
    resh = 48 if dataset == 'BJTaxi' else 24
    if dataset == 'BJTaxi':
        tindex = dateindex('2015-11-01', '2016-04-09', tslot)
    else:
        tindex = dateindex('2015-07-01', '2016-06-30', tslot)
    inflow = trgt[-len(tindex):, gridi, gridi, 0]
    inflow = inflow.reshape([-1, resh]).sum(axis=1)

    if dataset == 'BJTaxi':
        tindex = dateindex('2015-11-01', '2016-04-09', resh * tslot)
    else:
        tindex = dateindex('2015-07-01', '2016-06-30', resh * tslot)
    tindex = list(tindex)
    years = dates.YearLocator()
    months = dates.MonthLocator()
    dfmt = dates.DateFormatter('%b')
    ax = plt.figure()
    ax.set_size_inches(5, 2)

    ax = ax.add_subplot(111)
    ax.xaxis.set_major_locator(months)
    # ax.xaxis.set_minor_locator(years)
    ax.xaxis.set_major_formatter(dfmt)
    ax.set_xlim(tindex[tb_i], tindex[te_i])

    lw, ls = 0.5, '-'
    # plt.plot(tindex, read_line,color='r',linewidth=lw,linestyle='--',label='average')
    # plt.plot(tindex, result_line,color='black',linewidth=lw,linestyle=ls,label='rainy on Oct 27')
    plt.plot(tindex, inflow, color='r', linewidth=lw, label='In-Flow')
    # plt.plot(tindex, outflow, color='r', linewidth=lw, linestyle='--', label='Out-FLow')

    plt.xticks(fontsize=6, color='black')
    plt.ylabel(ylabel, fontsize=6, color='black')
    plt.yticks(fontsize=6, color='black')
    plt.grid()
    pp = pdf('figs/' + fname + '.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
Esempio n. 6
0
def plt_heatmap(dataset):
    results_path = '/cluster/zhouyirong09/peer-work/ST-Attn/result-collect/' + dataset + '/'
    trgt = np.vstack(
        np.load(results_path + 'ST-Attn/target.npy')) * preprocess_max[dataset]
    if dataset in ['citybike']:
        plt.imshow(np.log(trgt[-112, 3, :, :, 0] + 1), cmap='Reds')
    elif dataset in ['nyctaxi']:
        plt.imshow(np.log(trgt[-112, 3, ::-1, :, 0] + 1), cmap='Reds')
    else:
        plt.imshow(trgt[-112, 3, :, :, 0], cmap='Reds')
    plt.tick_params(which='both',
                    left=False,
                    bottom=False,
                    labelleft=False,
                    labelbottom=False)

    pp = pdf('figs/' + dataset + '_heatmap.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
Esempio n. 7
0
def improvements():
    xi = []
    datasets = ['BJTaxi', 'nyctaxi', 'citybike']
    for dataset in datasets:
        data = pd.read_csv('result-collect/' + dataset + '.csv')
        x = data.values
        x1 = x[-1, :]
        # x2 = x[:-1,:]
        # x2.sort(axis=0)
        # x2 = x2[0,:]
        x2 = x[-3, :]
        xi.append((x2 - x1) / x2)

    colors = ['r', 'g', 'b']
    marker = ['.', 's', '^']
    linestyle = [':', '-.', '--']
    xi = np.array(xi)
    plt.figure(figsize=(6, 4))
    for i in range(3):
        plt.plot([1, 2, 3, 4, 5, 6],
                 xi[i, :],
                 color=colors[i],
                 linestyle=linestyle[i],
                 lw=1,
                 marker=marker[i],
                 ms=6,
                 label=datasets[i])

    fontsize = 10
    plt.xticks(fontsize=fontsize, color='black')
    plt.ylabel('RMSE', fontsize=fontsize, color='black')
    plt.xlabel('predicting step', fontsize=fontsize, color='black')
    plt.yticks(fontsize=fontsize, color='black')
    plt.legend(fontsize=fontsize - 2)
    plt.grid()
    # plt.show()
    pp = pdf('figs/improvement.pdf')
    plt.savefig(pp, format='pdf')
    pp.close()
Esempio n. 8
0
    def multipage_compar(self, df, fout, title=''):
        plt.ioff()
        with pdf(fout) as p:
            for (wl, group) in df.groupby(df.wl):
                fig, self.ax = plt.subplots(figsize=(6, 6))
                ymax = max(group.Lsky_mes.max(), group.Lsky_sim.max())
                self.ax.set(xlim=(0, ymax), ylim=(0, ymax), aspect=1)
                self.ax.plot([0, ymax], [0, ymax], '--', color='grey')
                self.annot(group.Lsky_mes, group.Lsky_sim, ymax)

                group.plot(x='Lsky_mes',
                           y='Lsky_sim',
                           c="sza",
                           kind='scatter',
                           cmap='gnuplot',
                           ax=self.ax,
                           title=title + ' at ' + str(wl) + ' nm')
                p.savefig()
                plt.close()
            d = p.infodict()
            d['Title'] = 'Simulations vs measurements comparison '
            d['Author'] = u'T. Harmel (SOLVO)'
            d['CreationDate'] = datetime.datetime.today()
Esempio n. 9
0
def Main(outFile, pdfFile, strategy):
    '''
    Get the data from sklearn

    there are now three strategies:
        1) do everything with my classes and see the result
        2) do everything with my classes but run the runs separately
        3) replicate what my classes do without my classes
    '''
    print('\n\nHello there, welcome to testing things. These are our params:'
          + '\nstrategy:' + str(strategy) + ' / outFile:' + outFile + ' / pdfFile:'
          + pdfFile)
    print('Not happy with it? Probably your fault! Enjoy!\n')
    dataset = load_boston()
    # dataset = load_diabetes()
    features = dataset.data
    labels = dataset.target
    numberCases = len(labels)
    stSt = {}

    cDict = {}
    cDict['pheno'] = 'houseprice'
    cDict['fs'] = 'None'
    cDict['cValue'] = 1000
    cDict['eValue'] = 0.001
    cDict['kernel'] = 'rbf'
    cDict['numberCores'] = 10
    cDict['gridCv'] = 5

    cvObject = an.cv.KFold(numberCases, 10, shuffle=True)

    # now see if we only run one or multiple
    if strategy == None:
        stSt['oldway'] = runShitTheOldWay(features, labels, cvObject, cDict)
        stSt['ownTrain'] = runShitButNotAll(features, labels, cvObject, cDict)
        stSt['manualCv'] = runShitHereYourself(features, labels, cDict)
        stSt['CvOwnTrain'] = runShitOnCv(features, labels, cvObject, cDict)
        stSt['noCv'] = runShitNoCv(features, labels, cDict)
        stSt['clean'] = runShitClean(features, labels)
        # now save the result
        outF = gzip.open(outFile, 'wb')
        cPickle.dump(stSt, outF, protocol=2)

        # and show the results
        for result in stSt.keys():
            if not result == 'oldway':
                (pPheno,
                 tPheno,
                 errors,
                 cValues) = stSt[result]
            else:
                (pPheno,
                 tPheno) = stSt[result]
                 
            # tell a bit about the data
            print('Plotting ' + result)
            #print('    pPheno: ' + str(pPheno.shape) + '/' + str(np.max(pPheno)))
            print('    tPheno ' + str(tPheno.shape))
            print('    tPheno ' + str(np.max(tPheno)))
            fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
            fig4.suptitle('predicted over true age')

            tSP4 = fig4.add_subplot(111, title=result)
            tSP4.plot(tPheno, tPheno)
            tSP4.plot(tPheno, pPheno, 'co')

            fig4.subplots_adjust(hspace=0.5, wspace=0.5)
            pdfFileName = (pdfFile + '_' + result + '.pdf')
            pd = pdf(pdfFileName)
            pd.savefig(fig4)
            pd.close()
            plt.close(4)
            print('Just created ' + pdfFile + '\nAll done here!')
    else:
        if strategy == 'old':
            (pPheno,
             tPheno) = runShitTheOldWay(features, labels, cvObject, cDict)
        elif strategy == 'own':
            (pPheno,
             tPheno,
             errors,
             cValues) = runShitButNotAll(features, labels, cvObject, cDict)
        elif strategy == 'manual':
            (pPheno,
             tPheno,
             errors,
             cValues) = runShitHereYourself(features, labels, cDict)
        elif strategy == 'cv':
            (pPheno,
             tPheno,
             errors,
             cValues) = runShitOnCv(features, labels, cvObject, cDict)
        elif strategy == 'nocv':
            (pPheno,
             tPheno,
             errors,
             cValues) = runShitNoCv(features, labels, cDict)
        elif strategy == 'clean':
            (pPheno,
             tPheno,
             errors,
             cValues) = runShitClean(features, labels)
        else:
            print('Bullshit arguments!')

        # tell a bit about the data
        print('Plotting ' + strategy)
        print('    pPheno: ' + str(pPheno.shape) + '/' + str(np.max(pPheno)))
        print('    tPheno' + str(tPheno.shape) + '/' + str(np.max(tPheno)))
        
        # and now display the stuff
        fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
        fig4.suptitle('predicted over true age')

        tSP4 = fig4.add_subplot(111, title=strategy)
        tSP4.plot(tPheno, tPheno)
        tSP4.plot(tPheno, pPheno, 'co')

        fig4.subplots_adjust(hspace=0.5, wspace=0.5)
        pdfFileName = (pdfFile + '_strategy_' + strategy + '.pdf')
        pd = pdf(pdfFileName)
        pd.savefig(fig4)
        pd.close()
        plt.close(4)
        print('Just created ' + pdfFile + '\nAll done here!')
Esempio n. 10
0
        return self.var

    def getStd(self):
        return self.std

    def getErr(self):
        return self.err


#Setup PDF File Info
spacing = 11000
date = datetime.datetime.now()
path = 'PDF/'
file = '{}_{}_{}_AnchorSpacing-{}.pdf'.format(date.year, date.month, date.day,
                                              spacing)
page = pdf(path + file)

#Ask to select relevant files
file = tkFileDialog.askopenfilenames()

#Lump SampleData class into a list for entire anchor configuration
samples = []
for f in file:
    s = SampleData(f)
    samples.append(s)

dataMap_Figs = dataMap(
    samples, spacing)  #Plot data points, standard deviations, errorbar
heat_Figs = heat(samples, spacing)  #Generate error and variance heat meaps
tables = dataTable(
    samples
Esempio n. 11
0
def Visualize(study, analysis):
    print('Fetching analysis ' + analysis + ' now. Hold on to your heads!')
    tempAnalysis = study.analyses[analysis]
    networkNames = tempAnalysis.networks.keys()
    networkNames.sort()
    numberNetworks = float(len(networkNames))
    tempNet = tempAnalysis.networks.values()[0]
    numberSubjects = float(len(tempNet.truePheno))
    aName = analysis
    netFeatInd, networkNumbers = FeatureIndex(tempAnalysis)

    valueDict = {}
    shappStore = np.array([])
    errList = []
    maeList = []
    normCount = 0
    # a matrix to store networks by subjects prediction-errors for crosscorr
    kendallMat = np.array([])
    netErrMat = np.array([])
    netAbsMat = np.array([])

    for network in networkNames:
        tempNetwork = tempAnalysis.networks[network]
        tempDict = {}
        tempTrue = tempNetwork.truePheno
        tempPred = tempNetwork.predictedPheno
        tempErr = tempPred - tempTrue
        # append error to errorlist for ANOVA
        errList.append(tempErr)
        tempAbs = np.absolute(tempErr)
        tempMae = np.mean(tempAbs)
        # now rank those ages and store the ranks in the matrix to calculate
        # Kendall's W
        # must be in the same order for all networks
        tempRanks = np.argsort(tempPred)
        ranks = np.empty(len(tempRanks), int)
        ranks[tempRanks] = np.arange(len(tempRanks))
        ranks += 1
        if kendallMat.size == 0:
            kendallMat = ranks[None, ...]
        else:
            kendallMat = np.concatenate((kendallMat, ranks[None, ...]),
                                        axis=0)

        # now get the features for this network
        meanFeatures = NetworkFeatures(tempNetwork)
        # store the features under the name of the network they connect to

        netInd = netFeatInd[network]
        netInd = netInd[None, ...]
        print('meanFeat ' + str(meanFeatures.shape))
        print('netInd ' + str(netInd.shape))

        tempFeatStore = {}
        for netNum in networkNumbers.keys():
            netNumber = networkNumbers[netNum]
            # store this stuff
            tempFeatStore[netNum] = meanFeatures[netInd == netNumber]

        if netErrMat.size == 0:
            # first entry, populate
            netErrMat = tempErr[None, ...]
        else:
            # concatenate any further values
            netErrMat = np.concatenate((netErrMat, tempErr[None, ...]), axis=0)

        # append absolute error to netAbs Matrix for cross correlation
        if netAbsMat.size == 0:
            # first entry, populate
            netAbsMat = tempAbs[None, ...]
        else:
            # concatenate any further values
            netAbsMat = np.concatenate((netAbsMat, tempAbs[None, ...]), axis=0)

        # append mae to maelist for display
        maeList.append(tempMae)
        tempStd = np.std(tempErr)
        # get the p value of the shapiro-wilk test
        tempShapp = st.shapiro(tempErr)[1]
        if tempShapp >= 0.05:
            normCount += 1
        shappStore = np.append(shappStore, tempShapp)
        # assign these values to the DICT
        tempDict['true'] = tempTrue
        tempDict['pred'] = tempPred
        tempDict['error'] = tempErr
        tempDict['abs'] = tempAbs
        tempDict['std'] = tempStd
        tempDict['shapp'] = tempShapp
        tempDict['mae'] = tempMae
        tempDict['weights'] = tempFeatStore
        # put the dictionary in the valueDict
        valueDict[network] = tempDict

    # now run the tests to determine if we can run the ANOVA
    if shappStore.max() >= 0.05:
        print 'All networks are nicely normally distributed'
        # now run the ANOVA thing - right now, we run just everything
        anova = st.f_oneway(*errList)
        print '\nANOVA has run'
        print ('Behold the amazing F of '
               + str(round(anova[0], 4))
               + ' and p '
               + str(round(anova[1], 4)))

    else:
        print 'not all networks are normally distributed'
        print (str(normCount)
               + ' out of '
               + str(numberNetworks)
               + ' networks are normally distributed')
        anova = (999, 999)

    # now do the fancy Kendall's W business
    # first get the vector of summed total ranks across all networks (cols)

    print('Kendalls')
    print('nNet = ' + str(numberNetworks) + ' nSub = ' + str(numberSubjects))
    print(kendallMat.shape)
    sumRankVec = np.sum(kendallMat, axis=0)
    print(sumRankVec)
    meanRank = 1.0 / 2.0 * numberNetworks * (numberSubjects + 1)
    print(meanRank)
    sumSquaredDevs = np.sum((sumRankVec - meanRank) ** 2)
    print(sumSquaredDevs)
    kendallsW = 12.0 * sumSquaredDevs / ((numberNetworks ** 2.0) *
                                         ((numberSubjects ** 3)
                                           - numberSubjects))
    txtKendallsW = ('Kendall\'s W = ' + str(kendallsW))
    print('Kendall\'s W = ' + str(kendallsW))

    # now cols are hardcoded and rows depend on them
    cols = 2.0
    rows = np.ceil(numberNetworks / cols)

    # figure for text displays
    fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150)
    fig0.suptitle(aName)

    fig1 = plt.figure(1)
    fig1.suptitle('boxplots of error variance')
    # fig1.tight_layout()

    fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150)
    fig2.suptitle('error over true age')
    # fig2.tight_layout()

    fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150)
    fig3.suptitle('absolute error over true age')
    # fig3.tight_layout()

    fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
    fig4.suptitle('predicted over true age')
    # fig4.tight_layout()

    fig5 = plt.figure(5)
    fig5.suptitle('mean absolute error of the networks')

    '''
    fig6 = plt.figure(6)
    fig6.suptitle('correlation of errors between networks')
    '''

    fig7 = plt.figure(7)
    fig7.suptitle('correlation of absolute errors between networks')

    # another figure for text displays
    fig8 = plt.figure(0, figsize=(8.5, 11), dpi=150)
    fig8.suptitle(aName)

    loc = 1

    txtMae = ''
    # txtRmse = ''
    # txtNodes = ''
    # txtFeat = ''
    txtCorr = ''
    txtParm = ''

    errorVarList = []
    errorNameList = []
    numberFolds = None
    nitFigList = []
    trueAge = None
    loopFigId = 99
    figIds = []
    # now loop over the networks and get the data
    for network in networkNames:
        # first get the values from the dict
        tD = valueDict[network]

        # then start with the texts
        txtMae = (txtMae + 'MAE of ' + network
                  + ' = ' + str(np.round(tD['mae'], 3)) + '\n')
        # txtRmse = (txtRmse + 'RMSE of ' + networkName
        #           + ' = ' + str(tD['rmse']) + '\n')
        # read out temporary network file
        tempNet = tempAnalysis.networks[network]

        tpCorr = st.pearsonr(tempNet.truePheno,
                             tempNet.predictedPheno)[0]
        txtCorr = (txtCorr + 'Pearson\'s r for ' + network
                   + ' = ' + str(np.round(tpCorr, 3)) + '\n')
        txtParm = (txtParm + 'Parameters for ' + network
                   + ': C = ' + str(np.round(tempNet.cValue, 3)) + ' E = '
                   + str(np.round(tempNet.eValue, 6)) + '\n')

        numberFolds = len(tempNet.cvObject)
        trueAge = tempNet.truePheno

        errorVarList.append(tD['error'])
        errorNameList.append(network)

        tSP2 = fig2.add_subplot(rows, cols, loc, title=network)
        tSP2.plot(tD['true'], tD['error'], 'co')

        tSP3 = fig3.add_subplot(rows, cols, loc, title=network)
        tSP3.plot(tD['true'], tD['abs'], 'co')

        tSP4 = fig4.add_subplot(rows, cols, loc, title=network)
        tSP4.plot(tD['true'], tD['true'])
        tSP4.plot(tD['true'], tD['pred'], 'co')

        '''
        tSP6 = fig6.add_subplot(rows, cols, loc, title=network)
        tSP6.hist(tD[network], bins=20)
        # add 1 to the localization variable
        '''

        # make the loop for the network boxplot figures
        # for the boxplots, we have to append the data to a list
        # first get the current list of networks
        weightDict = tD['weights']
        netWeightList = []
        for netName in weightDict.keys():
            netWeightList.append(weightDict[netName])
            print(network + ' ' + netName + ' ' + str(len(weightDict[netName])))

        print(network + ' netweightlength ' + str(len(netWeightList)))

        # got all the weight vectors in here, now create a figure and
        # use loopFigId as index
        tempFigure = plt.figure(loopFigId)
        tempSubPlot = tempFigure.add_subplot(111)
        # boxIndex = np.arange(len(netWeightList))
        tempSubPlot.boxplot(netWeightList)
        tempSubPlot.set_ylabel('weight distribution for network ' + network)
        # tempSubPlot.set_xticks(boxIndex)
        # tempSubPlot.set_xticklabels(networkNames)
        plt.setp(tempSubPlot, xticklabels=networkNames)
        tempFigure.autofmt_xdate()
        # now store figure in list
        nitFigList.append(tempFigure)

        loc += 1
        figIds.append(loopFigId)
        loopFigId += 1

    # now create the text for the whole study
    txtName = ('The name of the current analysis is ' + aName)
    txtKernel = ('Here, a ' + tempAnalysis.kernel + ' kernel was used')
    txtFeat = ('The feature selection was ' + str(tempAnalysis.featureSelect))
    # txtConn = ('The connectivity trained on was ' + analysis.connType)
    txtFolds = (str(numberFolds) + ' folds were run while estimating age')
    txtAnova = ('ANOVA of Network effect on prediction error returned:\nF = '
                + str(np.round(anova[0], 3)) + ' p = '
                + str(np.round(anova[1], 3)))
    txtAge = ('Their ages ranged from ' + str(np.round(trueAge.min(), 2))
              + ' to ' + str(np.round(trueAge.max(), 2))
              + ' years of age (SD = '
              + str(np.round(np.std(trueAge), 2)) + ')')

    statString = (txtName + '\n' + txtKernel + '\n' + txtFeat
                  + '\n' + txtFolds + '\n' + txtAnova + '\n' + txtAge + '\n'
                  + txtKendallsW)
    # + txtRmse + '\n\n'
    dynString = (txtMae + '\n\n' + txtCorr + '\n\n'
                 + txtParm)

    fullString = (statString + '\n\n\n' + dynString)

    # let's build the text
    fig0.text(0.1, 0.2, fullString)

    # now we can build figure 1
    tSP1 = fig1.add_subplot(111)
    tSP1.boxplot(errorVarList)
    plt.setp(tSP1, xticklabels=errorNameList)
    fig1.autofmt_xdate()

    # and now we build figure 5
    tSP5 = fig5.add_subplot(111)
    indMae = range(len(maeList))
    tSP5.bar(indMae, maeList, facecolor='#99CCFF', align='center')
    tSP5.set_ylabel('MAE for network')
    tSP5.set_xticks(indMae)
    # set x-labels to the network names
    tSP5.set_xticklabels(networkNames)
    fig5.autofmt_xdate()

    # and lastly figure 6 with the crosscorrelations
    '''
    tSP6 = fig6.add_subplot(111)
    # run correlation analysis
    netCorrErr = np.corrcoef(netErrMat)
    tSP6.pcolor(ageMat)
    tSP6.pcolor(netCorrErr)
    for y in range(netCorrErr.shape[0]):
        for x in range(netCorrErr.shape[1]):
            tSP6.text(x + 0.5, y + 0.5, '%.2f' % netCorrErr[y, x],
                     horizontalalignment='center',
                     verticalalignment='center',
                     )
    '''

    # and then the same thing for absolute errors
    tSP7 = fig7.add_subplot(111)
    # run correlation analysis
    netCorrAbs = np.corrcoef(netAbsMat)
    tSP7.pcolor(kendallMat)
    '''tSP7.pcolor(netCorrAbs)
    for y in range(netCorrAbs.shape[0]):
        for x in range(netCorrAbs.shape[1]):
            tSP7.text(x + 0.5, y + 0.5, '%.2f' % netCorrAbs[y, x],
                     horizontalalignment='center',
                     verticalalignment='center',
                     )
'''
    # adjust the images
    fig1.subplots_adjust(hspace=0.5, wspace=0.5)
    fig2.subplots_adjust(hspace=0.5, wspace=0.5)
    fig3.subplots_adjust(hspace=0.5, wspace=0.5)
    fig4.subplots_adjust(hspace=0.5, wspace=0.5)
    fig5.subplots_adjust(hspace=0.5, wspace=0.5)
    # fig6.subplots_adjust(hspace=0.5, wspace=0.5)
    fig7.subplots_adjust(hspace=0.5, wspace=0.5)

    # now save all that to a pdf
    pp = pdf((aName + '_results.pdf'))
    pp.savefig(fig0)
    pp.savefig(fig1)
    pp.savefig(fig2)
    pp.savefig(fig3)
    pp.savefig(fig4)
    pp.savefig(fig5)
    # pp.savefig(fig6)
    pp.savefig(fig7)
    for figure in nitFigList:
        pp.savefig(figure)
    pp.close()

    plt.close(1)
    plt.close(2)
    plt.close(3)
    plt.close(4)
    plt.close(5)
    plt.close(6)
    plt.close(7)
    for figId in figIds:
        plt.close(figId)

    print '\nDone saving. Have a nice day.'
Esempio n. 12
0
def Visualize(study, analysis):
    print('Fetching analysis ' + analysis + ' now. Hold on to your heads!')
    tempAnalysis = study.analyses[analysis]
    networkName = tempAnalysis.networks.keys()[0]
    if len(tempAnalysis.networks.keys()) > 1:
        print('more than one network in there smartass')

    
    # begin with single network analysis
    network = tempAnalysis.networks[networkName]
    tempTrue = network.truePheno
    tempPred = network.predictedPheno
    tempErr = tempPred - tempTrue
    tempAbs = np.absolute(tempErr)
    tempMae = np.mean(tempAbs)
    errorVarList = [tempErr]


    # figure for text displays
    fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150)
    fig0.suptitle(analysis)

    fig1 = plt.figure(1)
    fig1.suptitle('boxplots of error variance')
    # fig1.tight_layout()
    tSP1 = fig1.add_subplot(111)
    tSP1.boxplot(errorVarList)


    fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150)
    fig2.suptitle('error over true age')
    # fig2.tight_layout()
    tSP2 = fig2.add_subplot(111, title=networkName)
    tSP2.plot(tempTrue, tempErr, 'co')
    
    fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150)
    fig3.suptitle('absolute error over true age')
    # fig3.tight_layout()
    tSP3 = fig3.add_subplot(111, title=networkName)
    tSP3.plot(tempTrue, tempAbs, 'co')
    
    fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
    fig4.suptitle('predicted over true age')
    # fig4.tight_layout()
    tSP4 = fig4.add_subplot(111, title=networkName)
    tSP4.plot(tempTrue, tempTrue)
    tSP4.plot(tempTrue, tempPred, 'co')

    errorVarList = []

    # then start with the texts
    txtMae = ('MAE of ' + networkName + ' = ' + str(np.round(tempMae, 3)) 
              + '\n')
    tpCorr = st.pearsonr(tempTrue, tempPred)[0]
    txtCorr = ('Pearson\'s r for ' + networkName
               + ' = ' + str(np.round(tpCorr, 3)) + '\n')
    txtParm = ('Parameters for ' + networkName
               + ': C = ' + str(np.round(network.cValue, 3)) + ' E = '
               + str(np.round(network.eValue, 6)) + '\n')

    numberFolds = len(network.cvObject)
    trueAge = tempTrue
    

    # now create the text for the whole study
    txtName = ('The name of the current analysis is ' + analysis)
    txtKernel = ('Here, a ' + tempAnalysis.kernel + ' kernel was used')
    txtFeat = ('The feature selection was ' + str(tempAnalysis.featureSelect))
    # txtConn = ('The connectivity trained on was ' + analysis.connType)
    txtFolds = (str(numberFolds) + ' folds were run while estimating age')
    txtAge = ('Their ages ranged from ' + str(np.round(trueAge.min(), 2))
              + ' to ' + str(np.round(trueAge.max(), 2))
              + ' years of age (SD = '
              + str(np.round(np.std(trueAge), 2)) + ')')
    
    statString = (txtName + '\n' + txtKernel + '\n' + txtFeat
                  + '\n' + txtFolds + '\n' + '\n' + txtAge)
    dynString = (txtMae + '\n\n' + txtCorr + '\n\n' + txtParm)

    fullString = (statString + '\n\n\n' + dynString)

    # let's build the text
    fig0.text(0.1, 0.2, fullString)

    # now save all that to a pdf
    pp = pdf((analysis + '_results.pdf'))
    pp.savefig(fig0)
    pp.savefig(fig1)
    pp.savefig(fig2)
    pp.savefig(fig3)
    pp.savefig(fig4)

    pp.close()

    print '\nDone saving. Have a nice day.'
Esempio n. 13
0
def Main(loadFile):
    # first get the file in - this may throw an error if the class of the
    # analysis object is not in the current PYTHONPATH - I'll find a solution
    # for this later
    openFile = gzip.open(loadFile, "rb")
    analysis = cPickle.load(openFile)
    aName = analysis.name

    # loop over the networks and store their information in a DICT
    valueDict = {}
    shappStore = np.array([])
    errList = []
    maeList = []
    normCount = 0
    networks = analysis.networks
    networkNames = networks.keys()
    networkNames.sort()

    for networkName in networkNames:
        # all the values are stored in another DICT
        tempDict = {}
        tempNet = networks[networkName]
        tempTrue = tempNet.trueData
        tempPred = tempNet.predictedData
        tempErr = tempTrue - tempPred
        # append error to errorlist for ANOVA
        errList.append(tempErr)
        tempAbs = np.absolute(tempErr)
        tempMae = np.mean(tempAbs)
        # append mae to maelist for display
        maeList.append(tempMae)
        tempStd = np.std(tempErr)
        # get the p value of the shapiro-wilk test
        tempShapp = st.shapiro(tempErr)[1]
        if tempShapp >= 0.05:
            normCount += 1
        shappStore = np.append(shappStore, tempShapp)
        # assign these values to the DICT
        tempDict["true"] = tempTrue
        tempDict["pred"] = tempPred
        tempDict["error"] = tempErr
        tempDict["abs"] = tempAbs
        tempDict["std"] = tempStd
        tempDict["shapp"] = tempShapp
        tempDict["mae"] = tempMae
        # put the dictionary in the valueDict
        valueDict[networkName] = tempDict

    # now run the tests to determine if we can the ANOVA not implemented yet
    if shappStore.max() >= 0.05:
        print "All networks are nicely normally distributed"
        # now run the ANOVA thing - right now, we run just everything
        anova = st.f_oneway(*errList)
        print "\nANOVA has run"
        print ("Behold the amazing F of " + str(round(anova[0], 4)) + " and p " + str(round(anova[1], 4)))

    else:
        print "not all networks are normally distributed"
        print (str(normCount) + " out of " + str(len(networkNames)) + " networks are normally distributed")

    """
    now make with the visualization

    as a reminder: these are the figures we are using
        1) Boxplots of the network-specific distributions of raw errors
        2) Plot of raw error over true age
        3) Plot of absolute error over true age
        4) Plot of predicted age over true age with MAE in the legend

    now go and prepare for this
    """

    numberNetworks = len(networkNames)

    """
    I am taking this part out of the code because I only want two lines
    of plots:

    edge = np.sqrt(numberNetworks)

    if np.ceil(edge) == edge:
        print 'how nice, all', str(numberNetworks), 'networks fit in '
        rows = int(edge)
        cols = int(edge)
    else:
        print 'nah, not all', str(numberNetworks), 'networks are going to fit '
        rows = int(np.ceil(edge))
        cols = int(np.ceil(edge))
        leftOver = rows * cols - numberNetworks
        print str(leftOver), 'subplots will be left empty '

    """

    # now cols are hardcoded and rows depend on them
    cols = 2.0
    rows = np.ceil(numberNetworks / cols)

    # figure for text displays
    fig0 = plt.figure(0, figsize=(8.5, 11), dpi=150)
    fig0.suptitle(aName)

    fig1 = plt.figure(1)
    fig1.suptitle("boxplots of error variance")
    # fig1.tight_layout()

    fig2 = plt.figure(2, figsize=(8.5, 11), dpi=150)
    fig2.suptitle("error over true age")
    # fig2.tight_layout()

    fig3 = plt.figure(3, figsize=(8.5, 11), dpi=150)
    fig3.suptitle("absolute error over true age")
    # fig3.tight_layout()

    fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
    fig4.suptitle("predicted over true age")
    # fig4.tight_layout()

    fig5 = plt.figure(5)
    fig5.suptitle("mean absolute error of the networks")

    loc = 1

    """
    Let's take on the text processing
    While looping through the networks I will populate the strings I need
    for information purposes. I am going to display:
    Only once
        1) The name of the study
        2) The kernel
        3) The feature selection technique
        4) The kind of connectivity trained on
        5) The number of crossvalidations (folds)
        6) Results of the ANOVA (F statistic and p value)
        7) The number of subjects
        8) The age distribution of the subjects

    For each network
        1) MAE
        3) Number of nodes in the network (not yet possible)
        4) Number of features used for training / Full number of features
        (also not yet possible)
        5) correlation of true and predicted age (+R^2)
        6) parameters used for running

    So let's prepare these text variables before the loop
    """
    txtMae = ""
    # txtRmse = ''
    # txtNodes = ''
    # txtFeat = ''
    txtCorr = ""
    txtParm = ""

    errorVarList = []
    errorNameList = []
    numberFolds = None
    numberSubs = None
    trueAge = None
    # now loop over the networks and get the data
    for networkName in networkNames:
        # first get the values from the dict
        tD = valueDict[networkName]

        # then start with the texts
        txtMae = txtMae + "MAE of " + networkName + " = " + str(np.round(tD["mae"], 3)) + "\n"
        # txtRmse = (txtRmse + 'RMSE of ' + networkName
        #           + ' = ' + str(tD['rmse']) + '\n')
        # read out temporary network file
        tempNet = networks[networkName]
        tpCorr = st.pearsonr(tempNet.trueData, tempNet.predictedData)[0]
        txtCorr = txtCorr + "Pearson's r for " + networkName + " = " + str(np.round(tpCorr, 3)) + "\n"
        txtParm = (
            txtParm
            + "Parameters for "
            + networkName
            + ": C = "
            + str(np.round(tempNet.C, 3))
            + " E = "
            + str(np.round(tempNet.E, 3))
            + "\n"
        )

        numberFolds = tempNet.numberFolds
        numberSubs = len(tempNet.subNames)
        trueAge = tempNet.trueData
        # for the boxplots, we have to append the data to a list
        errorVarList.append(tD["error"])
        errorNameList.append(networkName)

        tSP2 = fig2.add_subplot(rows, cols, loc, title=networkName)
        tSP2.plot(tD["true"], tD["error"], "co")

        tSP3 = fig3.add_subplot(rows, cols, loc, title=networkName)
        tSP3.plot(tD["true"], tD["abs"], "co")

        tSP4 = fig4.add_subplot(rows, cols, loc, title=networkName)
        tSP4.plot(tD["true"], tD["true"])
        tSP4.plot(tD["true"], tD["pred"], "co")
        # add 1 to the localization variable
        loc += 1

    # now create the text for the whole study
    txtName = "The name of the current analysis is " + aName
    txtKernel = "Here, a " + analysis.kernel + " kernel was used"
    txtFeat = "The feature selection was " + str(analysis.fs)
    # txtConn = ('The connectivity trained on was ' + analysis.connType)
    txtFolds = str(numberFolds) + " folds were run while estimating age"
    txtAnova = (
        "ANOVA of Network effect on prediction error returned:\nF = "
        + str(np.round(anova[0], 3))
        + " p = "
        + str(np.round(anova[1], 3))
    )
    txtSubs = "There were " + str(numberSubs) + " subjects in this analysis"
    txtAge = (
        "Their ages ranged from "
        + str(np.round(trueAge.min(), 2))
        + " to "
        + str(np.round(trueAge.max(), 2))
        + " years of age (SD = "
        + str(np.round(np.std(trueAge), 2))
        + ")"
    )

    statString = (
        txtName
        + "\n"
        + txtKernel
        + "\n"
        + txtFeat  # + '\n' + txtConn
        + "\n"
        + txtFolds
        + "\n"
        + txtAnova
        + "\n"
        + txtSubs
        + "\n"
        + txtAge
    )
    # + txtRmse + '\n\n'
    dynString = txtMae + "\n\n" + txtCorr + "\n\n" + txtParm

    fullString = statString + "\n\n\n" + dynString

    # let's build the text
    fig0.text(0.1, 0.2, fullString)

    # now we can build figure 1
    tSP1 = fig1.add_subplot(111)
    tSP1.boxplot(errorVarList)
    plt.setp(tSP1, xticklabels=errorNameList)

    # and now we build figure 5
    tSP5 = fig5.add_subplot(111)
    indMae = range(len(maeList))
    tSP5.bar(indMae, maeList, facecolor="#99CCFF", align="center")
    tSP5.set_ylabel("MAE for network")
    tSP5.set_xticks(indMae)
    # set x-labels to the network names
    tSP5.set_xticklabels(networkNames)
    fig5.autofmt_xdate()

    # adjust the images
    fig1.subplots_adjust(hspace=0.5, wspace=0.5)
    fig2.subplots_adjust(hspace=0.5, wspace=0.5)
    fig3.subplots_adjust(hspace=0.5, wspace=0.5)
    fig4.subplots_adjust(hspace=0.5, wspace=0.5)
    fig5.subplots_adjust(hspace=0.5, wspace=0.5)

    # now save all that to a pdf
    pp = pdf((aName + "_results.pdf"))
    pp.savefig(fig0)
    pp.savefig(fig1)
    pp.savefig(fig2)
    pp.savefig(fig3)
    pp.savefig(fig4)
    pp.savefig(fig5)
    pp.close()

    print "\nDone saving. Have a nice day."

    pass
Esempio n. 14
0
def Main(inFile, outFile, pdfFile):
    '''
    Load the file, cut it into pieces and print the last line
    '''
    loadFile = open(inFile, 'rb')
    fileLines = loadFile.readlines()
    subDir = {}
    # storage for comparing other models
    featMat = np.array([])
    labVec = np.array([])

    subCount = 1
    for line in fileLines:
        useLine = line.strip().split()
        run = 1
        # make a new subject
        subName = ('case_' + str(subCount))
        tempSub = pp.Subject(subName, 'test')

        tempFeat = np.array([])
        for word in useLine:
            if run == 4 or run == 9:
                pass
            elif run == 14:
                tempPheno = float(word)
            else:
                tempFeat = np.append(tempFeat, float(word))

            run += 1
        tempSub.pheno = {}
        tempSub.pheno['houseprice'] = tempPheno
        tempSub.feature = tempFeat
        subDir[subName] = tempSub
        subCount += 1
        
        # and add them also to the storage vars
        if featMat.size == 0:
            featMat = tempFeat[None, ...]
        else:
            featMat = np.concatenate((featMat, tempFeat[None, ...]), axis=0)
        
        labVec = np.append(labVec, tempPheno)
        print(str(featMat.shape) + '/' + str(labVec.shape))

    # now make a network of it and run that stuff
    numberSubjects = len(subDir.keys())
    print(numberSubjects)
    # make a crossvalidation object
    cvObject = an.cv.KFold(numberSubjects, 10, shuffle=True)

    testNetwork = an.Network('test', cvObject)
    testNetwork.subjects = subDir
    testNetwork.pheno = 'houseprice'
    testNetwork.featureSelect = 'None'
    testNetwork.cValue = 1000
    testNetwork.gridCv = 5
    testNetwork.gridCores = 1
    testNetwork.eValue = 0.001
    testNetwork.kernel = 'rbf'
    # set number of parallel processes in Network
    testNetwork.numberCores = 10
    # make the runs
    print(len(testNetwork.subjects.keys()))
    print(len(testNetwork.cvObject))
    testNetwork.makeRuns()
    # now run the runs
    testNetwork.executeRuns()
    # and also run the other model quickly
    (modelPred, modelTrue) = compareLogReg(featMat, labVec)
    print('\nGot here')    
    # now save the result
    outF = gzip.open(outFile, 'wb')
    cPickle.dump(testNetwork, outF, protocol=2)
    
    # and display the rest
    pPheno = testNetwork.predictedPheno
    tPheno = testNetwork.truePheno
    
    fig4 = plt.figure(4, figsize=(8.5, 11), dpi=150)
    fig4.suptitle('predicted over true age')
    
    tSP4 = fig4.add_subplot(111, title=testNetwork.name)
    tSP4.plot(tPheno, tPheno)
    tSP4.plot(tPheno, pPheno, 'co')
    tSP4.plot(modelTrue, modelPred, 'mo')
    
    fig4.subplots_adjust(hspace=0.5, wspace=0.5)
    
    pd = pdf(pdfFile)
    pd.savefig(fig4)
    pd.close()
    plt.close(4)
    print('Just created ' + pdfFile + '\nAll done here!')
Esempio n. 15
0
def main(inargs):
    """Run the program."""

    # specify source files from a model run, and year and month to plot

    dir_1 = inargs.dir_1
    tim_1 = inargs.mon_1
    if len(tim_1) != 7:
        print('INPUT ERROR: Date(s) must be in YYYY-MM format.')
        sys.exit()  # abort
    print('First source: ', dir_1)
    mod_1 = dir_1.split('/')[-4]
    run_1 = dir_1.split('/')[-3]
    sce_1 = dir_1.split('/')[-2]
    dir_1_var = glob.glob(dir_1 + '*/*/*',
                          recursive=True)  # identify all reported variables
    if dir_1_var == []:
        print(
            'INPUT ERROR: Directory specification error (no variables found).')
        print(dir_1)
        sys.exit()
    if mod_1[0:7] == 'GISS-E2':  # skip *fx* and *fy* variables
        dir_1_var = [i for i in dir_1_var if 'fx' not in i]
        dir_1_var = [i for i in dir_1_var if 'fy' not in i]
        print(
            'WARNING: Skipping *fx* and *fy* from E2 owing to potential dimensionality differences.'
        )
    if inargs.include != None:
        dir_1_var_incl = []
        for i_1, d_1 in enumerate(inargs.include.split(
                ',')):  # iterate over comma-separated list
            var_incl = [i for i in dir_1_var
                        if d_1 in i]  # include only specified variable(s)
            dir_1_var_incl.extend(var_incl)
        dir_1_var = list(dir_1_var_incl)
        if dir_1_var == []:
            print(
                'DATA ERROR: First directory output is missing specified variable(s).'
            )
            print(inargs.include)
            sys.exit()
    elif inargs.exclude != None:
        for i_1, d_1 in enumerate(inargs.exclude.split(
                ',')):  # iterate over comma-separated list
            dir_1_var = [i for i in dir_1_var
                         if d_1 not in i]  # exclude specified variable(s)
        if dir_1_var == []:
            print(
                'DATA ERROR: First directory output is empty beyond excluded variable(s).'
            )
            print(inargs.exclude)
            sys.exit()
    ncs_1 = []
    for i_1, d_1 in enumerate(dir_1_var):
        v_all = sorted(glob.glob(dir_1_var[i_1] + '/*',
                                 recursive=True))  # all versions
        if v_all != []:
            if inargs.first:  # optionally use first version in the output
                f_all = sorted(
                    glob.glob(v_all[0] +
                              '/*.nc'))  # all files in first version
            else:  # otherwise use default last (most recent version)
                f_all = sorted(glob.glob(v_all[-1] + '/*.nc'))
            f_tim = [i for i in f_all if '201412' in i]
            if f_tim == []:
                f_tim = [i for i in f_all if tim_1.replace('-', '') in i]
            if f_tim != []:
                ncs_1.append(f_tim[0])
    try:  # make sure that year and month output exist
        dat_1 = xr.open_dataset(ncs_1[0]).sel(time=tim_1)
    except:
        print(
            'DATA ERROR: First directory output is missing specified year and month.'
        )
        print(ncs_1)
        sys.exit()  # abort if date and month are not found

    # optionally specify source files from a second model run

    if inargs.compare:
        dir_2 = inargs.dir_2
        if inargs.mon_2 != None:
            tim_2 = inargs.mon_2  # optional different year and month from second run
        else:
            tim_2 = tim_1  # or same year and month from second run
        print('Second source: ', dir_2, tim_2)
        mod_2 = dir_2.split('/')[-4]
        run_2 = dir_2.split('/')[-3]
        sce_2 = dir_2.split('/')[-2]
        dir_2_var = glob.glob(dir_2 + '*/*/*', recursive=True)
        if dir_2_var == []:
            print(
                'INPUT ERROR: Directory specification error (no variables found).'
            )
            print(dir_2)
            sys.exit()
        if mod_2[0:7] == 'GISS-E2':  # skip *fx* and *fy* variables
            dir_2_var = [i for i in dir_2_var if 'fx' not in i]
            dir_2_var = [i for i in dir_2_var if 'fy' not in i]
            print(
                'WARNING: Skipping *fx* and *fy* from E2 owing to potential dimensionality differences.'
            )
        dir_2_var = [i for i in dir_2_var if 'fx' not in i]
        if inargs.include != None:
            dir_2_var_incl = []
            for i_2, d_2 in enumerate(inargs.include.split(
                    ',')):  # iterate over comma-separated list
                var_incl = [i for i in dir_2_var
                            if d_2 in i]  # include only specified variable(s)
                dir_2_var_incl.extend(var_incl)
            dir_2_var = list(dir_2_var_incl)
            if dir_2_var == []:
                print(
                    'DATA ERROR: Second directory output is missing specified variable(s).'
                )
                print(inargs.include)
                sys.exit()
        elif inargs.exclude != None:
            for i_2, d_2 in enumerate(inargs.exclude.split(
                    ',')):  # iterate over comma-separated list
                dir_2_var = [i for i in dir_2_var
                             if d_2 not in i]  # exclude specified variable(s)
            if dir_2_var == []:
                print(
                    'DATA ERROR: Second directory output is empty beyond excluded variable(s).'
                )
                print(inargs.exclude)
                sys.exit()
        ncs_2 = []
        for i_2, d_2 in enumerate(dir_2_var):
            v_all = sorted(glob.glob(dir_2_var[i_2] + '/*', recursive=True))
            if v_all != []:
                if inargs.first:
                    f_all = sorted(glob.glob(v_all[0] + '/*.nc'))
                else:
                    f_all = sorted(glob.glob(v_all[-1] + '/*.nc'))
                f_tim = [i for i in f_all if '201412' in i]
                if f_tim == []:
                    f_tim = [i for i in f_all if tim_1.replace('-', '') in i]
                if f_tim != []:
                    ncs_2.append(f_tim[0])
        try:
            dat_2 = xr.open_dataset(ncs_2[0]).sel(time=tim_2)
        except:
            print(
                'DATA ERROR: Second directory output is missing specified year and month.'
            )
            print(ncs_2[0])
            sys.exit()

    # specify local destination for output comparison plots

    if inargs.compare:
        out_pdf = mod_1 + '_' + run_1 + '_' + sce_1 + '_vs_' + mod_2 + '_' + run_2 + '_' + sce_2 + '.pdf'
    else:
        out_pdf = mod_1 + '_' + run_1 + '_' + sce_1 + '.pdf'

    # loop over source files in first model run

    pp = pdf('multipage.pdf'
             )  # initialize multipage package to receive sequential images

    print('Processing first source ...')
    for i_1, f_1 in enumerate(ncs_1):  # loop over variables identified above
        print(f_1)
        dat_1 = xr.open_dataset(f_1)
        var_1 = list(dat_1.data_vars.keys())[-1]  # identify the variable name
        ndims = len(dat_1[var_1].dims)  # determine dimensionality
        if ndims != 2:
            dat_1 = xr.open_dataset(f_1).sel(
                time=tim_1)  # most fields have time
        if ndims == 1:  # data is a scalar (dummy plot)
            fig = plt.figure(figsize=[8.5, 11])
            ax = fig.add_subplot(211)
            fld_1 = dat_1[var_1].isel(time=0)  # scalar value
            ax.annotate('SCALAR VALUE',
                        xy=(0.4, 0.5),
                        xycoords='axes fraction')
            path, fname = os.path.split(f_1)
            parr = path.split(mod_1)
            title = parr[0] + mod_1 + '\n' + parr[
                1] + '/\n' + fname + '\n' + fld_1.attrs['long_name']
            # parse directory name for title
            plt.title(title)
            val_str = ("value = " + "{:.5e}".format(fld_1.data))
            ax.annotate(tim_1 + ' ' + val_str,
                        xy=(0, -0.15),
                        xycoords='axes fraction')

            if inargs.compare:  # optionally search for matching variable in second directory
                search_str = '/' + fname.split('_')[0] + '_' + fname.split(
                    '_')[1]
                matching_file = [i for i in ncs_2 if search_str in i]
            else:
                matching_file = []
            if matching_file != []:
                f_2 = matching_file[0]
                print(f_2)
                dat_2 = xr.open_dataset(f_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                ax = fig.add_subplot(212)
                fld_2 = dat_2[var_2].isel(time=0)
                ax.annotate('SCALAR VALUE',
                            xy=(0.4, 0.5),
                            xycoords='axes fraction')
                path, fname = os.path.split(f_2)
                parr = path.split(mod_2)
                title = parr[0] + mod_2 + '\n' + parr[
                    1] + '/\n' + fname + '\n' + fld_2.attrs['long_name']
                plt.title(title)
                val_str = ("value = " + "{:.5e}".format(fld_2.data))
                ax.annotate(tim_2 + ' ' + val_str,
                            xy=(0, -0.15),
                            xycoords='axes fraction')
                fig.tight_layout(pad=6)

            pp.savefig()  # completed page
        elif ndims == 2:
            fig = plt.figure(figsize=[8.5, 11])
            ax = fig.add_subplot(
                211, projection=ccrs.PlateCarree(central_longitude=180))
            fld_1 = dat_1[var_1]
            fld_1.plot(ax=ax,
                       transform=ccrs.PlateCarree(),
                       cbar_kwargs={'label': fld_1.units},
                       rasterized=True)
            ax.coastlines()
            path, fname = os.path.split(f_1)
            parr = path.split(mod_1)
            title = parr[0] + mod_1 + '\n' + parr[
                1] + '/\n' + fname + '\n' + fld_1.attrs['long_name']
            plt.title(title)
            val_str = ("min, max, avg = " + "{:.5e}".format(fld_1.min().data) +
                       ", "
                       "{:.5e}".format(fld_1.max().data) + ", " +
                       "{:.5e}".format(fld_1.mean().data))
            ax.annotate(tim_1 + ' ' + val_str,
                        xy=(0, -0.25),
                        xycoords='axes fraction')

            if inargs.compare:  # optionally search for matching variable in second directory
                search_str = '/' + fname.split('_')[0] + '_' + fname.split(
                    '_')[1]
                matching_file = [i for i in ncs_2 if search_str in i]
            else:
                matching_file = []
            if matching_file != []:
                f_2 = matching_file[0]
                print(f_2)
                dat_2 = xr.open_dataset(f_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                ax = fig.add_subplot(
                    212, projection=ccrs.PlateCarree(central_longitude=180))
                fld_2 = dat_2[var_2]
                fld_2.plot(ax=ax,
                           transform=ccrs.PlateCarree(),
                           cbar_kwargs={'label': fld_2.units},
                           rasterized=True)
                ax.coastlines()
                path, fname = os.path.split(f_2)
                parr = path.split(mod_2)
                title = parr[0] + mod_2 + '\n' + parr[
                    1] + '/\n' + fname + '\n' + fld_2.attrs['long_name']
                plt.title(title)
                val_str = ("min, max, mean = " +
                           "{:.5e}".format(fld_2.min().data) + ", "
                           "{:.5e}".format(fld_2.max().data) + ", " +
                           "{:.5e}".format(fld_2.mean().data))
                ax.annotate(tim_2 + ' ' + val_str,
                            xy=(0, -0.25),
                            xycoords='axes fraction')
                fig.tight_layout(pad=6)

            pp.savefig()
        elif ndims == 3:  # data is lat/lon (simplest case to plot)
            fig = plt.figure(figsize=[8.5, 11])  # initialize letter-size page
            if dat_1[var_1].dims[1] == 'basin':
                # initialize top subplot with line plot
                ax = fig.add_subplot(211)
                fld_1 = dat_1[var_1].isel(basin=0, time=0)  # data to plot
                subtit = ' (basin=0)'
                fld_1.plot(ax=ax)
            else:
                # initialize top subplot with a mapping projection
                ax = fig.add_subplot(
                    211, projection=ccrs.PlateCarree(central_longitude=180))
                fld_1 = dat_1[var_1].isel(time=0)  # data to plot
                subtit = ''
                # plot on specified projection with default color bar, rasterize to reduce file size
                fld_1.plot(ax=ax,
                           transform=ccrs.PlateCarree(),
                           cbar_kwargs={'label': fld_1.units},
                           rasterized=True)
                ax.coastlines()
            # parse directory name for title
            path, fname = os.path.split(f_1)
            parr = path.split(mod_1)
            title = parr[0] + mod_1 + '\n' + parr[
                1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit
            plt.title(title)
            # calculate statistics and report below figure
            val_str = ("min, max, mean = " +
                       "{:.5e}".format(fld_1.min().data) + ", "
                       "{:.5e}".format(fld_1.max().data) + ", " +
                       "{:.5e}".format(fld_1.mean().data))
            ax.annotate(tim_1 + ' ' + val_str,
                        xy=(0, -0.25),
                        xycoords='axes fraction')

            if inargs.compare:  # optionally search for matching variable in second directory
                search_str = '/' + fname.split('_')[0] + '_' + fname.split(
                    '_')[1]
                matching_file = [i for i in ncs_2 if search_str in i]
            else:
                matching_file = []
            if matching_file != []:  # if it exists, execute same procedure for matching data
                f_2 = matching_file[0]
                print(f_2)
                dat_2 = xr.open_dataset(f_2).sel(time=tim_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                if dat_2[var_2].dims[1] == 'basin':
                    ax = fig.add_subplot(212)
                    fld_2 = dat_2[var_2].isel(basin=0, time=0)
                    subtit = ' (basin=0)'
                    fld_2.plot(ax=ax)
                else:
                    ax = fig.add_subplot(
                        212,
                        projection=ccrs.PlateCarree(central_longitude=180))
                    fld_2 = dat_2[var_2].isel(time=0)
                    subtit = ''
                    fld_2.plot(ax=ax,
                               transform=ccrs.PlateCarree(),
                               cbar_kwargs={'label': fld_2.units},
                               rasterized=True)
                    ax.coastlines()
                path, fname = os.path.split(f_2)
                parr = path.split(mod_2)
                title = parr[0] + mod_2 + '\n' + parr[
                    1] + '/\n' + fname + '\n' + fld_2.attrs[
                        'long_name'] + subtit
                plt.title(title)
                val_str = ("min, max, mean = " +
                           "{:.5e}".format(fld_2.min().data) + ", "
                           "{:.5e}".format(fld_2.max().data) + ", " +
                           "{:.5e}".format(fld_2.mean().data))
                ax.annotate(tim_2 + ' ' + val_str,
                            xy=(0, -0.25),
                            xycoords='axes fraction')
                fig.tight_layout(pad=6)

            pp.savefig()  # completed page
        elif ndims == 4:  # narrow down to either one basin or longitude for plotting
            fig = plt.figure(figsize=[8.5, 11])
            ax = fig.add_subplot(211)
            if dat_1[var_1].dims[1] == 'basin':
                fld_1 = dat_1[var_1].isel(basin=0, time=0)
                subtit = ' (basin=0)'
            else:
                fld_1 = dat_1[var_1].isel(lon=0, time=0)
                subtit = ' (lon=0)'
            fld_1.plot(ax=ax,
                       cbar_kwargs={'label': fld_1.units},
                       rasterized=True)
            if dat_1[var_1].dims[1] == ('lev') or dat_1[var_1].dims[1] == (
                    'plev'):
                ax.invert_yaxis()
            if dat_1[var_1].dims[2] == ('lev'):
                ax.invert_yaxis()  # ocean basin case
            path, fname = os.path.split(f_1)
            parr = path.split(mod_1)
            title = parr[0] + mod_1 + '\n' + parr[
                1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit
            plt.title(title)
            val_str = ("min, max, mean = " +
                       "{:.5e}".format(fld_1.min().data) + ", "
                       "{:.5e}".format(fld_1.max().data) + ", " +
                       "{:.5e}".format(fld_1.mean().data))
            ax.annotate(tim_1 + ' ' + val_str,
                        xy=(0, -0.25),
                        xycoords='axes fraction')

            if inargs.compare:
                search_str = '/' + fname.split('_')[0] + '_' + fname.split(
                    '_')[1]
                matching_file = [i for i in ncs_2 if search_str in i]
            else:
                matching_file = []
            if matching_file != []:
                f_2 = matching_file[0]
                print(f_2)
                dat_2 = xr.open_dataset(f_2).sel(time=tim_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                ax = fig.add_subplot(212)
                if dat_2[var_2].dims[1] == 'basin':
                    fld_2 = dat_2[var_2].isel(basin=0, time=0)
                    subtit = ' (basin=0)'
                else:
                    fld_2 = dat_2[var_2].isel(lon=0, time=0)
                    subtit = ' (lon=0)'
                fld_2.plot(ax=ax,
                           cbar_kwargs={'label': fld_2.units},
                           rasterized=True)
                if dat_2[var_2].dims[1] == ('lev') or dat_2[var_2].dims[1] == (
                        'plev'):
                    ax.invert_yaxis()
                if dat_2[var_2].dims[2] == ('lev'):
                    ax.invert_yaxis()  # ocean basin case
                path, fname = os.path.split(f_2)
                parr = path.split(mod_2)
                title = parr[0] + mod_2 + '\n' + parr[
                    1] + '/\n' + fname + '\n' + fld_2.attrs[
                        'long_name'] + subtit
                val_str = ("min, max, mean = " +
                           "{:.5e}".format(fld_2.min().data) + ", "
                           "{:.5e}".format(fld_2.max().data) + ", " +
                           "{:.5e}".format(fld_2.mean().data))
                ax.annotate(tim_2 + ' ' + val_str,
                            xy=(0, -0.25),
                            xycoords='axes fraction')
                plt.title(title)
                fig.tight_layout(pad=6)

            pp.savefig()
        else:  # more than 4 dimensions: also choose a latitude
            fig = plt.figure(figsize=[8.5, 11])
            ax = fig.add_subplot(211)
            fld_1 = dat_1[var_1].isel(lat=0, lon=0, time=0)
            subtit = ' (Lat/Lon=0/0)'
            fld_1.plot(ax=ax,
                       cbar_kwargs={'label': fld_1.units},
                       rasterized=True)
            path, fname = os.path.split(f_1)
            parr = path.split(mod_1)
            title = parr[0] + mod_1 + '\n' + parr[
                1] + '/\n' + fname + '\n' + fld_1.attrs['long_name'] + subtit
            plt.title(title)
            val_str = ("min, max, mean = " +
                       "{:.5e}".format(fld_1.min().data) + ", "
                       "{:.5e}".format(fld_1.max().data) + ", " +
                       "{:.5e}".format(fld_1.mean().data))
            ax.annotate(tim_1 + ' ' + val_str,
                        xy=(0, -0.2),
                        xycoords='axes fraction')

            if inargs.compare:
                search_str = '/' + fname.split('_')[0] + '_' + fname.split(
                    '_')[1]
                matching_file = [i for i in ncs_2 if search_str in i]
            else:
                matching_file = []
            if matching_file != []:
                f_2 = matching_file[0]
                print(f_2)
                dat_2 = xr.open_dataset(f_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                ax = fig.add_subplot(212)
                fld_2 = dat_1[var_2].isel(lat=0, lon=0, time=0)
                subtit = ' (Lat/Lon=0/0)'
                fld_2.plot(ax=ax,
                           cbar_kwargs={'label': fld_2.units},
                           rasterized=True)
                path, fname = os.path.split(f_2)
                parr = path.split(mod_2)
                title = parr[0] + mod_2 + '\n' + parr[
                    1] + '/\n' + fname + '\n' + fld_2.attrs[
                        'long_name'] + subtit
                val_str = ("min, max, mean = " +
                           "{:.5e}".format(fld_2.min().data) + ", "
                           "{:.5e}".format(fld_2.max().data) + ", " +
                           "{:.5e}".format(fld_2.mean().data))
                ax.annotate(tim_2 + ' ' + val_str,
                            xy=(0, -0.2),
                            xycoords='axes fraction')
                plt.title(title)
                fig.tight_layout(pad=6)

            pp.savefig()

        plt.close()  # clear matplotlib for next page (to avoid overflows)

    # loop over source files in second model run (plot only any missing from first run)

    if inargs.compare:
        print('Processing second source ...')
        for i_2, f_2 in enumerate(ncs_2):
            path, fname = os.path.split(f_2)
            matching_file = [
                i for i in ncs_1
                if fname.split('_')[0] + '_' + fname.split('_')[1] in i
            ]
            if matching_file == []:
                print(f_2)
                dat_2 = xr.open_dataset(f_2)
                var_2 = list(dat_2.data_vars.keys())[-1]
                ndims = len(dat_2[var_2].dims)
                if ndims != 2:
                    dat_2 = xr.open_dataset(f_2).sel(
                        time=tim_2)  # usually time is a dimension
                if ndims == 1:
                    fig = plt.figure(figsize=[8.5, 11])
                    ax = fig.add_subplot(212)
                    fld_2 = dat_2[var_2].isel(time=0)
                    ax.annotate('SCALAR VALUE',
                                xy=(0.4, 0.5),
                                xycoords='axes fraction')
                    path, fname = os.path.split(f_2)
                    parr = path.split(mod_2)
                    title = parr[0] + mod_2 + '\n' + parr[
                        1] + '/\n' + fname + '\n' + fld_2.attrs['long_name']
                    plt.title(title)
                    val_str = ("value = " + "{:.5e}".format(fld_2.data))
                    ax.annotate(tim_2 + ' ' + val_str,
                                xy=(0, -0.15),
                                xycoords='axes fraction')
                    pp.savefig()
                elif ndims == 2:
                    fig = plt.figure(figsize=[8.5, 11])
                    ax = fig.add_subplot(
                        212,
                        projection=ccrs.PlateCarree(central_longitude=180))
                    fld_2 = dat_2[var_2]
                    fld_2.plot(ax=ax,
                               transform=ccrs.PlateCarree(),
                               cbar_kwargs={'label': fld_2.units},
                               rasterized=True)
                    ax.coastlines()
                    path, fname = os.path.split(f_2)
                    parr = path.split(mod_2)
                    title = parr[0] + mod_2 + '\n' + parr[
                        1] + '/\n' + fname + '\n' + fld_2.attrs['long_name']
                    plt.title(title)
                    val_str = ("min, max, mean = " +
                               "{:.5e}".format(fld_2.min().data) + ", "
                               "{:.5e}".format(fld_2.max().data) + ", " +
                               "{:.5e}".format(fld_2.mean().data))
                    ax.annotate(tim_2 + ' ' + val_str,
                                xy=(0, -0.25),
                                xycoords='axes fraction')
                    plt.title(title)
                    pp.savefig()
                elif ndims == 3:
                    fig = plt.figure(figsize=[8.5, 11])
                    if dat_2[var_2].dims[1] == 'basin':
                        ax = fig.add_subplot(212)
                        fld_2 = dat_2[var_2].isel(basin=0, time=0)
                        subtit = ' (basin=0)'
                        fld_2.plot(ax=ax)
                    else:
                        ax = fig.add_subplot(
                            212,
                            projection=ccrs.PlateCarree(central_longitude=180))
                        fld_2 = dat_2[var_2].isel(time=0)
                        subtit = ''
                        fld_2.plot(ax=ax,
                                   transform=ccrs.PlateCarree(),
                                   cbar_kwargs={'label': fld_2.units},
                                   rasterized=True)
                        ax.coastlines()
                    path, fname = os.path.split(f_2)
                    parr = path.split(mod_2)
                    title = parr[0] + mod_2 + '\n' + parr[
                        1] + '/\n' + fname + '\n' + fld_2.attrs[
                            'long_name'] + subtit
                    plt.title(title)
                    val_str = ("min, max, mean = " +
                               "{:.5e}".format(fld_2.min().data) + ", "
                               "{:.5e}".format(fld_2.max().data) + ", " +
                               "{:.5e}".format(fld_2.mean().data))
                    ax.annotate(tim_2 + ' ' + val_str,
                                xy=(0, -0.25),
                                xycoords='axes fraction')
                    pp.savefig()
                elif ndims == 4:
                    fig = plt.figure(figsize=[8.5, 11])
                    ax = fig.add_subplot(212)
                    if dat_2[var_2].dims[1] == 'basin':
                        fld_2 = dat_2[var_2].isel(basin=0, time=0)
                        subtit = ' (basin=0)'
                    else:
                        fld_2 = dat_2[var_2].isel(lon=0, time=0)
                        subtit = ' (lon=0)'
                    fld_2.plot(ax=ax,
                               cbar_kwargs={'label': fld_2.units},
                               rasterized=True)
                    if dat_2[var_2].dims[1] == (
                            'lev') or dat_2[var_2].dims[1] == ('plev'):
                        ax.invert_yaxis()
                    if dat_2[var_2].dims[2] == ('lev'):
                        ax.invert_yaxis()  # ocean basin case
                    path, fname = os.path.split(f_2)
                    parr = path.split(mod_2)
                    title = parr[0] + mod_2 + '\n' + parr[
                        1] + '/\n' + fname + '\n' + fld_2.attrs[
                            'long_name'] + subtit
                    plt.title(title)
                    val_str = ("min, max, mean = " +
                               "{:.5e}".format(fld_2.min().data) + ", "
                               "{:.5e}".format(fld_2.max().data) + ", " +
                               "{:.5e}".format(fld_2.mean().data))
                    ax.annotate(tim_2 + ' ' + val_str,
                                xy=(0, -0.25),
                                xycoords='axes fraction')
                    pp.savefig()
                else:
                    fig = plt.figure(figsize=[8.5, 11])
                    ax = fig.add_subplot(212)
                    fld_2 = dat_2[var_2].isel(lat=0, lon=0, time=0)
                    subtit = ' (Lat/Lon=0/0)'
                    fld_2.plot(ax=ax,
                               cbar_kwargs={'label': fld_2.units},
                               rasterized=True)
                    path, fname = os.path.split(f_2)
                    parr = path.split(mod_2)
                    title = parr[0] + mod_2 + '\n' + parr[
                        1] + '/\n' + fname + '\n' + fld_2.attrs[
                            'long_name'] + subtit
                    plt.title(title)
                    val_str = ("min, max, mean = " +
                               "{:.5e}".format(fld_2.min().data) + ", "
                               "{:.5e}".format(fld_2.max().data) + ", " +
                               "{:.5e}".format(fld_2.mean().data))
                    ax.annotate(tim_2 + ' ' + val_str,
                                xy=(0, -0.2),
                                xycoords='axes fraction')
                    pp.savefig()

    pp.close()  # multipage document complete
    os.popen('mv multipage.pdf ' +
             out_pdf)  # save document to descriptive file name

    print('Output file: ', out_pdf)
Esempio n. 16
0
def Plot_splines():
    ### Plot'em!
    import seaborn as sb
    import matplotlib.pyplot as plt
    from matplotlib.lines import Line2D
    from matplotlib.backends.backend_pdf import PdfPages as pdf

    # Choose magnification factor. Fluxes will be multiplied by
    # GeV**mag to better bring out features in steaper regions
    # (mag = 0 for "pure" fluxes):
    mag = 3

    def Load_data(savename):
        ### Loading the plot data previously stored by Save_data_for_plots():

        filename = dirname + '/plotdata/' + savename + '_plotdata.dat'
        # Load coszen axis:
        with open(filename) as f:
            xc = [line for line in f if line.startswith('# coszen')]
        xc = [float(i) for i in xc[0].split()[2:]]
        # Load energy axis, datapoints and splines:
        loaddata = np.loadtxt(filename, unpack=True)
        xe = loaddata[0]
        datapoints   = loaddata[1::2]
        splines      = loaddata[2::2]
        return(xe, xc, datapoints, splines)

    def Create_title(particle, flavor):
        ### Create plot title.
        title = (
            'atmospheric ' + particle[1] + ' flux '
            'for ' + name + ', tabulated data and spline fits'
        )
        title_flavor = (
            'atmospheric ' + flavor[1] + ' fluxes '
            'for ' + name + ', tabulated data and spline fits'
        )
        return title, title_flavor

    def Create_axlabels():
        ### Create title and axis labels.
        xlabel = r'kinetic energy $E$ [GeV]'
        ylabel = r'flux $\Phi$ [GeV$^{' + str(mag-1) + r'}$cm$^{-2}$s$^{-1}$sr$^{-1}$]'
        return xlabel, ylabel

    def Create_label(particle):
        label = (
            particle[1].split(')')[0].replace('(', '') if 'from' in particle[1]
            else particle[1].split(' ')[0]
        )
        return label

    sb.set_context(context='notebook', font_scale=1.2, rc={"lines.linewidth": 2.0})
    sb.set_style('whitegrid')
    xlabel, ylabel = Create_axlabels()
    markers = '+'
    custom_legend = [Line2D([0], [0], color='gray', lw=0, marker=markers,
                            label=r'data for $\cos(\theta)=1$')]
    short_legend = [Line2D([0], [0], color='gray', lw=0, marker=markers, label=r'data')]
    numcols = 3

    flavors = [
        ('nue',      r'$\nu_{e}$'),
        ('nuebar',   r'$\bar{\nu}_{e}$'),
        ('numu',     r'$\nu_{\mu}$'),
        ('numubar',  r'$\bar{\nu}_{\mu}$'),
        ('nutau',    r'$\nu_{\tau}$'),
        ('nutaubar', r'$\bar{\nu}_{\tau}$')
    ]
    # Currently we have 9 different flux variants to display (total, conv, pi,
    # k, K0, K0L, K0S, prompt, mu). Adjust the length of this linspace
    # according to changes in number of variants:
    flavor_colors  = plt.cm.jet(np.linspace(0,1,9))

    for f, flavor in enumerate(flavors):
        pdf_flavor = pdf(dirname+'/plots/perflavor_'+flavor[0]+('_mag'+str(mag) if mag!=0 else '')+'.pdf')
        fig3, ax3   = plt.subplots(1, 1, figsize=(9,5))
        fig3.subplots_adjust(bottom=0.14, top=0.91, left=0.12, right=0.95, wspace=0.2)
        fig4, axes4 = plt.subplots(3, numcols, figsize=(9,5), sharex='col')
        fig4.subplots_adjust(bottom=0.13, top=0.76, left=0.1, right=0.95, wspace=0.2, hspace=0.3)
        fig4.text(0.5, 0.03, r'cosine of zenith angle $\cos(\theta)$', ha='center')
        fig4.text(0.02, 0.5, r'flux $\Phi$ [GeV$^{-1}$cm$^{-2}$s$^{-1}$sr$^{-1}$]', va='center', rotation='vertical')

        p=0
        for particle in particles:
            if (
                (('bar' not in flavor[0]) and (flavor[0] in particle[0]) and ('bar' not in particle[0]))
                or (('bar' in flavor[0]) and (flavor[0] in particle[0]))
            ):
                savename = name + '_' + particle[0]
                xe, xc, datapoints, splines = Load_data(savename)
                # Transpose the data for energy dependence plots:
                datapointsT, splinesT = np.transpose(datapoints), np.transpose(splines)
                # Set iterators for xe and coszen dependence plotting (because we don't
                # want hundreds of fluxes in a plot):
                eit, cit = int(len(splinesT)/9), int(len(datapoints)/9)
                colors  = plt.cm.jet(np.linspace(0,1,len(splines[::cit])))
                title, title_flavor = Create_title(particle, flavor)

                fig1, ax1 = plt.subplots(1, 1, figsize = (9, 5))
                fig1.subplots_adjust(bottom=0.14, top=0.91, left=0.12, right=0.95, wspace=0.2)
                fig2, axes2 = plt.subplots(3, numcols, figsize=(9, 5), sharex='col')
                fig2.subplots_adjust(bottom=0.13, top=0.87, left=0.1, right=0.95, wspace=0.2, hspace=0.3)
                fig2.suptitle(title, fontsize=14)
                fig2.text(0.5, 0.03, r'cosine of zenith angle $\cos(\theta)$', ha='center')
                fig2.text(0.02, 0.5, r'flux $\Phi$ [GeV$^{-1}$cm$^{-2}$s$^{-1}$sr$^{-1}$]', va='center', rotation='vertical')
                fig4.suptitle(title_flavor, fontsize=14)


                ##--- Energy dependence plots ----------------------------------
                pdf_particle = pdf(dirname+'/plots/'+savename+('_mag'+str(mag) if mag!=0 else '')+'.pdf')

                # Plot the splines:
                for spline, label, color in zip(splines[::cit], xc[::cit], colors):
                    ax1.loglog(xe, spline*xe**mag, label='%.2f' % label, color=color)
                # Plot the data points:
                ax1.loglog(xe, datapoints[-1]*xe**mag, lw=0, marker=markers, color='gray', alpha=0.4)
                ax1.set_title(title)
                ax1.set_xlabel(xlabel)
                ax1.set_ylabel(ylabel)
                leg11 = ax1.legend(handles=custom_legend, loc='upper right')
                leg12 = ax1.legend(title=r'$\cos(\theta)$', loc='lower left')
                ax1.add_artist(leg11)

                # Per flavor:
                ax3.loglog(xe, datapoints[-1]*xe**mag, lw=0, marker=markers, color='gray', alpha=0.4)
                ax3.loglog(xe, splines[-1]*xe**mag, label=Create_label(particle), ls=particle[2],
                           color=flavor_colors[p], alpha=0.8)
                ax3.set_title(title_flavor)
                ax3.set_xlabel(xlabel)
                ax3.set_ylabel(ylabel)
                # ax3.set_ylim(1e-18)
                leg31 = ax3.legend(handles=custom_legend, loc='upper right')
                leg32 = ax3.legend(loc='lower left', ncol=2)
                ax3.add_artist(leg31)

                ##--- Coszen dependence plots ----------------------------------
                # Plot the data points and splines:
                for ax2, ax4, dataset, spline, label, color in zip(
                    axes2.flatten(),
                    axes4.flatten(),
                    datapointsT[21::10],
                    splinesT[21::10],
                    xe[21::10],
                    colors
                ):
                    ax2.tick_params(axis='both', which='major', labelsize=10)
                    ax2.yaxis.offsetText.set_fontsize(10)
                    ax2.set_title(r'at $E\approx$%.0e' % label + ' GeV', fontsize=12, loc='right')
                    ax2.plot(xc, dataset, lw=0, marker=markers, color='gray', alpha=0.4)
                    ax2.plot(xc, spline, color=color)
                    ax2.ticklabel_format(axis='y', style='sci', scilimits=(0,0))

                    ax4.tick_params(axis='both', which='major', labelsize=10)
                    ax4.yaxis.offsetText.set_fontsize(10)
                    ax4.set_title(r'at $E\approx$%.0e' % label + ' GeV', fontsize=12, loc='right')
                    ax4.plot(xc, dataset, lw=0, marker=markers, color='gray', alpha=0.4)
                    ax4.plot(xc, spline, color=flavor_colors[p], alpha=0.9,
                             ls=particle[2], label=Create_label(particle))
                    ax4.ticklabel_format(axis='y', style='sci', scilimits=(0,0))
                    handles, labels = ax4.get_legend_handles_labels()

                p+=1
                pdf_particle.savefig(fig1)
                pdf_particle.savefig(fig2)
                pdf_particle.close()
        if p:
            leg41 = fig4.legend(handles=short_legend, loc='upper left', fontsize=12,
                                bbox_to_anchor=(0.05, 0.45, 0.86, 0.5))
            leg42 = fig4.legend(handles, labels, loc='upper left', ncol=5,
                                mode='expand', bbox_to_anchor=(0.16, 0.45, 0.80, 0.5),
                                fontsize=12)
        pdf_flavor.savefig(fig3)
        pdf_flavor.savefig(fig4)
        pdf_flavor.close()