def processOutput(path):
    rawResult = [];
    with open(path, 'r') as f:
        for content in f:
            m = re.search('\[(\d+)\]', content);
            if m is not None:
                #print content;
                #print m.group(1);
                rawResult.append(int(m.group(1)));
    #print rawResult;
    rawArray = np.asarray(rawResult);
    rawArray = np.reshape(rawArray,(-1,3));
    #print rawArray;
    xTrails = np.arange(0,rawArray.shape[0],10);
    for i in xTrails:
        tmpMean,tmpStd = gf.calcMeanandStd(rawArray[i:i+10]);
        print tmpMean,tmpStd;
def drawFig(datasetTitle,
            data=None,
            path=None,
            n_trails=1,
            type='f1',
            figSavedPath=None):
    import matplotlib
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FuncFormatter

    def to_percent(y, position):
        # Display the ylabel in percent.
        # Ignore the passed in position. This has the effect of scaling the default
        # tick locations.
        s = str(100 * y)

        # The percent symbol needs escaping in latex
        if matplotlib.rcParams['text.usetex'] is True:
            return s + r'$\%$'
        else:
            return s + '%'

    plt.clf()
    if path is not None:
        rawData = np.loadtxt(path, delimiter=",")

    if type is 'f1':
        data = rawData[:, [3, 7, 11, 15]]
    elif type is 'accuracy':
        data = rawData[:, [4, 8, 12, 16]]
    elif type is 'misclassRate':
        data = 1 - rawData[:, [4, 8, 12, 16]]
    elif type is 'precision':
        data = rawData[:, [1, 5, 9, 13]]
    elif type is 'recall':
        data = rawData[:, [2, 6, 10, 14]]
    print(data.shape)
    n_dim = data.shape[0]
    if n_trails is not 1:
        n_dim = int(data.shape[0] / n_trails)
        data = data.reshape(n_trails, n_dim, data.shape[1])
        data_mean, data_std = gf.calcMeanandStd(data)
        print(data_mean.shape)
        print(data_std.shape)
    else:
        data_mean = data
        data_std = np.zeros(data.shape)
    print "Number of points on x-axis: %d" % n_dim
    x = rawData[:n_dim, 0]
    '''
    if type is 'f1':
        data_mean = data_mean[:,[3,7,11,15]];
        data_std = data_std[:,[3,7,11,15]];
    elif type is 'accuracy':
        #drawAccuracy(datasetTitle, data=data, path=path, figSavedPath=figSavedPath);
        data_mean = data_mean[:, [4, 8, 12,16]];
        data_std = data_std[:, [4, 8, 12,16]];
    elif type is 'precision':
        #drawPrecision(datasetTitle, data=data, path=path, figSavedPath=figSavedPath)
        data_mean = data_mean[:, [1, 5, 9,13]];
        data_std = data_std[:, [1, 5, 9,13]];
    elif type is 'recall':
        #drawRecall(datasetTitle, data=data, path=path, figSavedPath=figSavedPath)
        data_mean = data_mean[:, [2, 6, 10,14]];
        data_std = data_std[:, [2, 6, 10,14]];
    '''
    minVector = np.amin(data_mean, axis=0)
    yMin = min(minVector)
    maxVector = np.amax(data_mean, axis=0)
    yMax = max(maxVector)

    yMin = (yMin - 0.05) if (yMin - 0.05) > 0 else 0
    yMax = (yMax * 1.25) if (yMax * 1.25) < 1 else 1.05

    if type is '':
        pcaAccErrorLine = plt.errorbar(x,
                                       data_mean[:, 0],
                                       yerr=data_std[:, 0],
                                       fmt='b',
                                       capsize=4)
        pcaAccLine, = plt.plot(x, data_mean[:, 0], 'b-')

        dpdpcaErrorLine = plt.errorbar(x,
                                       data_mean[:, 1],
                                       yerr=data_std[:, 1],
                                       fmt='m',
                                       capsize=4)
        dpdpcaLine, = plt.plot(x, data_mean[:, 1], 'm-')

        wishartErrorLine = plt.errorbar(x,
                                        data_mean[:, 2],
                                        yerr=data_std[:, 2],
                                        fmt='g',
                                        capsize=4)
        wishartLine, = plt.plot(x, data_mean[:, 2], 'g-')

        dpproErrorLine = plt.errorbar(x,
                                      data_mean[:, 3],
                                      yerr=data_std[:, 3],
                                      fmt='r',
                                      capsize=4)
        dpproLine, = plt.plot(x, data_mean[:, 3], 'r-')

        #plt.axis([0.05, x[-1] + 0.05, yMin, yMax]);
        plt.axis([0, 2, yMin, yMax])
        plt.legend([pcaAccLine, dpdpcaLine, wishartLine, dpproLine],
                   ['PCA', 'DPDPCA', 'Wishart', 'DPPro'],
                   loc=1)

        plt.xlabel('Epsilon', fontsize=18)
        plt.ylabel('Classification Accuracy', fontsize=18)
        plt.title(datasetTitle, fontsize=18)
        plt.xticks(x)

        formatter = FuncFormatter(to_percent)
        plt.gca().yaxis.set_major_formatter(formatter)
        plt.gcf().subplots_adjust(left=0.15)
    else:
        theRange = [1, 3, 5, 7, 9]
        x = x[theRange]
        data_mean = data_mean[theRange, :]
        data_std = data_std[theRange, :]

        ax = plt.gca()
        width = 0.06
        gBar = ax.bar(x - 0.1,
                      data_mean[:, 1],
                      width,
                      color='m',
                      yerr=data_std[:, 1],
                      capsize=2)
        # gBar = ax.bar(x-0.035, gF1Mean, width, color='r', yerr=gF1Std,capsize=2);
        wBar = ax.bar(x - 0.03,
                      data_mean[:, 2],
                      width,
                      color='g',
                      yerr=data_std[:, 2],
                      capsize=2)
        dpproBar = ax.bar(x + 0.03,
                          data_mean[:, 3],
                          width,
                          color='r',
                          yerr=data_std[:, 3],
                          capsize=2)
        pcaBar = ax.bar(x + 0.1,
                        data_mean[:, 0],
                        width,
                        color='b',
                        yerr=data_std[:, 0],
                        capsize=2)
        plt.axis([0, 2.1, yMin, yMax])
        # plt.legend([pcaF1Line,gF1Line,wF1Line], ['PCA', 'Gaussian Noise', 'Wishart Noise'], loc=4);
        # ax.legend((gBar[0], wBar[0], pcaBar[0]), ('Gaussian Noise','Wishart Noise','PCA'), loc=1, prop={'size':6});
        ax.legend((pcaBar[0], gBar[0], wBar[0], dpproBar[0]),
                  ('PCA', 'DPDPCA', 'Wishart', 'DPPro'),
                  loc=1,
                  prop={'size': 8})
        plt.xlabel('Epsilon', fontsize=18)
        plt.ylabel(type, fontsize=18)
        plt.title(datasetTitle, fontsize=18)
        plt.xticks(x)

    if figSavedPath is None:
        plt.show()
    else:
        plt.savefig(figSavedPath + "dppro_" + datasetTitle + '.pdf',
                    format='pdf',
                    dpi=1000)
Exemple #3
0
def drawF1Score(datasetTitle,
                data=None,
                path=None,
                n_trails=1,
                figSavedPath=None):
    import matplotlib.pyplot as plt
    from matplotlib.ticker import MultipleLocator

    plt.clf()
    if path is not None:
        data = np.loadtxt(path, delimiter=",")
    """
    #x = [10,40,70,100,130,160,190,220,250,280,310,340];
    y1Line,y2Line,y3Line = plt.plot(x, data[:,1], 'bo-', x, data[:,2], 'r^-',x, data[:,3], 'gs-');
    
    plt.legend([y1Line,y2Line,y3Line], ['PCA', 'Gaussian Noise','Wishart Noise'],loc=4);
    """
    minVector = np.amin(data[:, 1:], axis=0)
    yMin = min(minVector)
    maxVector = np.amax(data[:, 1:], axis=0)
    yMax = max(maxVector)

    yMin = (yMin - 0.05) if (yMin - 0.05) > 0 else 0
    yMax = (yMax + 0.05) if (yMax + 0.05) < 1 else 1.05
    n_dim = data.shape[0]

    if n_trails is not 1:
        n_dim = int(data.shape[0] / n_trails)
        data = data.reshape(n_trails, -1, data.shape[1])
        data_mean, data_std = gf.calcMeanandStd(data)
    else:
        data_mean = data
        data_std = np.zeros(data.shape)
    print "Number of points on x-axis: %d" % n_dim
    #print(data_mean);
    #print(data_std);
    x = data_mean[:, 0]

    pcaF1Mean = data_mean[:, 3]
    pcaF1Std = data_std[:, 3]
    largestXVal = x[-1]

    pcaF1ErrorLine = plt.errorbar(x,
                                  pcaF1Mean,
                                  yerr=pcaF1Std,
                                  fmt='b',
                                  capsize=4)
    pcaF1Line, = plt.plot(x, pcaF1Mean, 'b-')

    gF1Mean = data_mean[:, 7]
    gF1Std = data_std[:, 7]
    gF1ErrorLine = plt.errorbar(x, gF1Mean, yerr=gF1Std, fmt='m', capsize=4)
    gF1Line, = plt.plot(x, gF1Mean, 'm-')

    wF1Mean = data_mean[:, 11]
    wF1Std = data_std[:, 11]
    wF1ErrorLine = plt.errorbar(x, wF1Mean, yerr=wF1Std, fmt='g', capsize=4)
    wF1Line, = plt.plot(x, wF1Mean, 'g-')
    plt.axis([0, x[-1] + 1, yMin, yMax])
    #plt.axis([0,10,0.4,1.0]);
    plt.legend([gF1Line, wF1Line, pcaF1Line], ['DPDPCA', 'Wishart', 'PCA'],
               loc=4)
    #plt.legend([gF1Line, pcaF1Line], ['DPDPCA', 'PCA'], loc=4);
    plt.xlabel('Number of Principal Components', fontsize=18)
    plt.ylabel('F1-Score', fontsize=18)
    plt.title(datasetTitle, fontsize=18)
    plt.xticks(x)
    '''
    ax = plt.gca();
    if largestXVal>200:
        majorLocator = MultipleLocator(24);
    else:
        majorLocator = MultipleLocator(8);
    ax.xaxis.set_major_locator(majorLocator);
    '''
    if figSavedPath is None:
        plt.show()
    else:
        plt.savefig(figSavedPath + "numOfPC_" + datasetTitle + '.pdf',
                    format='pdf',
                    dpi=1000)
def drawScore(datasetTitle,
              data=None,
              path=None,
              n_trails=1,
              type='Accuracy',
              figSavedPath=None):
    import matplotlib
    import matplotlib.pyplot as plt
    from matplotlib.ticker import FuncFormatter

    def to_percent(y, position):
        # Display the ylabel in percent.
        # Ignore the passed in position. This has the effect of scaling the default
        # tick locations.
        s = str(100 * y)

        # The percent symbol needs escaping in latex
        if matplotlib.rcParams['text.usetex'] is True:
            return s + r'$\%$'
        else:
            return s + '%'

    plt.clf()
    if path is not None:
        data = np.loadtxt(path, delimiter=",")
    n_dim = data.shape[0]
    if n_trails is not 1:
        n_dim = int(data.shape[0] / n_trails)
        data = data.reshape(n_trails, -1, data.shape[1])
        data_mean, data_std = gf.calcMeanandStd(data)
    else:
        data_mean = data
        data_std = np.zeros(data.shape)
    print "Number of points on x-axis: %d" % n_dim
    x = data_mean[:, 0]

    if type is 'f1':
        #drawF1Score(datasetTitle, data=data, path=path, figSavedPath=figSavedPath);
        data_mean = data_mean[:, [3, 7, 11]]
        data_std = data_std[:, [3, 7, 11]]
    elif type is 'accuracy':
        #drawAccuracy(datasetTitle, data=data, path=path, figSavedPath=figSavedPath);
        data_mean = data_mean[:, [4, 8, 12]]
        data_std = data_std[:, [4, 8, 12]]
    """
    minVector = np.amin(data[:,1:],axis=0);
    yMin = min(minVector);
    maxVector = np.amax(data[:,1:],axis=0);
    yMax = max(maxVector);

    yMin = (yMin-0.1) if (yMin-0.1)>0 else 0;
    yMax = (yMax+0.1) if (yMax+0.1)<1 else 1;
    #x = [10,40,70,100,130,160,190,220,250,280,310,340];
    y1Line,y2Line,y3Line = plt.plot(x, data[:,1], 'bo-', x, data[:,2], 'r^-',x, data[:,3], 'gs-');
    if datasetTitle is 'Ionosphere':
        plt.legend([y1Line,y2Line,y3Line], ['PCA','DPDPCA','PrivateLocalPCA'],loc=4);
    else:
        plt.legend([y1Line,y2Line,y3Line], ['PCA','DPDPCA','PrivateLocalPCA'],loc=2);
    """
    #x = np.arange(10,60,10);
    pcaF1 = []
    dpdpcaF1 = []
    privateF1 = []
    '''
    for i in range(0, len(x)):
        pcaIndices = np.arange(i, data.shape[0], len(x));
        print pcaIndices;
        pcaF1.append(data[pcaIndices, 3]);
        dpdpcaF1.append(data[pcaIndices, 7]);
        privateF1.append(data[pcaIndices, 11]);
    # print np.asarray(gF1);
    '''
    ax = plt.gca()
    width = 2.3

    #pcaF1Mean, pcaF1Std = gf.calcMeanandStd(np.asarray(pcaF1).T);
    #pcaF1ErrorLine = plt.errorbar(x, pcaF1Mean, yerr=pcaF1Std, fmt='b', capsize=4);
    #pcaF1Line, = plt.plot(x,pcaF1Mean,'b-');
    #dpdpcaF1Mean, dpdpcaF1Std = gf.calcMeanandStd(np.asarray(dpdpcaF1).T);
    #dpdpcaF1ErrorLine = plt.errorbar(x, dpdpcaF1Mean, yerr=dpdpcaF1Std, fmt='m', capsize=4);
    #dpdpcaF1Line, = plt.plot(x,dpdpcaF1Mean,'m-');
    #privateF1Mean, privateF1Std = gf.calcMeanandStd(np.asarray(privateF1).T);
    #privateF1ErrorLine = plt.errorbar(x, privateF1Mean, yerr=privateF1Std, fmt='c', capsize=4);
    #privateF1Line, = plt.plot(x,privateF1Mean,'c-');

    pcaBar = ax.bar(x + 2.5,
                    data_mean[:, 0],
                    width,
                    color='b',
                    yerr=data_std[:, 0],
                    capsize=2)
    dpdpcaBar = ax.bar(x - 2.5,
                       data_mean[:, 1],
                       width,
                       color='m',
                       yerr=data_std[:, 1],
                       capsize=2)
    privateBar = ax.bar(x,
                        data_mean[:, 2],
                        width,
                        color='c',
                        yerr=data_std[:, 2],
                        capsize=2)

    yMin = round(np.amin(data_mean[:, 2]), 1) - 0.1
    if datasetTitle == 'GISETTE':
        plt.axis([5, 55, yMin, 1.05])
    else:
        plt.axis([5, 55, yMin, 1.13])
    """
    if 'p53' in datasetTitle:
        plt.legend([pcaF1Line, dpdpcaF1Line, privateF1Line], ['PCA', 'DPDPCA', 'PrivateLocalPCA'], loc=2, fontsize='small');
    else:
        plt.legend([pcaF1Line, dpdpcaF1Line, privateF1Line], ['PCA', 'DPDPCA', 'PrivateLocalPCA'], loc=4, fontsize='small');
    """
    #plt.legend([pcaF1Line, dpdpcaF1Line, privateF1Line], ['PCA', 'DPDPCA', 'PrivateLocalPCA'], loc=4, fontsize='small');
    ax.legend((dpdpcaBar[0], privateBar[0], pcaBar[0]),
              ('DPDPCA', 'PrivateLocalPCA', 'PCA'),
              loc=1,
              prop={'size': 7})
    plt.xlabel('Samples at Each Data Owner', fontsize=18)
    #plt.ylabel('Accuracy', fontsize=18);
    plt.title(datasetTitle, fontsize=18)
    plt.xticks(x)

    if type == 'Accuracy':
        plt.ylabel('Accuracy', fontsize=18)
        formatter = FuncFormatter(to_percent)
        plt.gca().yaxis.set_major_formatter(formatter)
        plt.gcf().subplots_adjust(left=0.15)
    else:
        plt.ylabel('F1-Score', fontsize=18)
    plt.yticks(np.arange(yMin, 1.05, 0.1))
    """
    ax = plt.gca();
    if x[-1] > 100:
        majorLocator = MultipleLocator(8);
    else:
        majorLocator = MultipleLocator(2);
    #ax.xaxis.set_major_locator(majorLocator);
    """
    if figSavedPath is None:
        plt.show()
    else:
        plt.savefig(figSavedPath + "samples_" + datasetTitle + '_bar.pdf',
                    format='pdf',
                    dpi=1000)
def drawF1Score(datasetTitle, data=None, path=None, figSavedPath=None):
    plt.clf()
    if path is not None:
        data = np.loadtxt(path, delimiter=",")
    numOfDim = data.shape[0] / 9
    x = data[:numOfDim, 0]
    xBound = x[-1] + 1
    """
    minVector = np.amin(data[:,1:],axis=0);
    yMin = min(minVector);
    maxVector = np.amax(data[:,1:],axis=0);
    yMax = max(maxVector);

    yMin = (yMin-0.1) if (yMin-0.1)>0 else 0;
    yMax = (yMax+0.1) if (yMax+0.1)<1 else 1;
    #x = [10,40,70,100,130,160,190,220,250,280,310,340];
    y1Line,y2Line,y3Line = plt.plot(x, data[:,1], 'bo-', x, data[:,2], 'r^-',x, data[:,3], 'gs-');
    if datasetTitle is 'Ionosphere':
        plt.legend([y1Line,y2Line,y3Line], ['PCA','DPDPCA','PrivateLocalPCA'],loc=4);
    else:
        plt.legend([y1Line,y2Line,y3Line], ['PCA','DPDPCA','PrivateLocalPCA'],loc=2);
    """
    pcaF1 = []
    dpdpcaF1 = []
    privateF1 = []
    for i in range(0, numOfDim):
        pcaIndices = np.arange(i, data.shape[0], numOfDim)
        pcaF1.append(data[pcaIndices, 1])
        dpdpcaF1.append(data[pcaIndices, 2])
        privateF1.append(data[pcaIndices, 3])
    # print np.asarray(gF1);
    pcaF1Mean, pcaF1Std = gf.calcMeanandStd(np.asarray(pcaF1).T)
    pcaF1ErrorLine = plt.errorbar(x,
                                  pcaF1Mean,
                                  yerr=pcaF1Std,
                                  fmt='b',
                                  capsize=4)
    pcaF1Line, = plt.plot(x, pcaF1Mean, 'b-')
    dpdpcaF1Mean, dpdpcaF1Std = gf.calcMeanandStd(np.asarray(dpdpcaF1).T)
    dpdpcaF1ErrorLine = plt.errorbar(x,
                                     dpdpcaF1Mean,
                                     yerr=dpdpcaF1Std,
                                     fmt='m',
                                     capsize=4)
    dpdpcaF1Line, = plt.plot(x, dpdpcaF1Mean, 'm-')
    privateF1Mean, privateF1Std = gf.calcMeanandStd(np.asarray(privateF1).T)
    privateF1ErrorLine = plt.errorbar(x,
                                      privateF1Mean,
                                      yerr=privateF1Std,
                                      fmt='c',
                                      capsize=4)
    privateF1Line, = plt.plot(x, privateF1Mean, 'c-')
    plt.axis([0, xBound, -0.05, 1.05])
    if 'p53' in datasetTitle:
        plt.legend([pcaF1Line, dpdpcaF1Line, privateF1Line],
                   ['PCA', 'DPDPCA', 'PrivateLocalPCA'],
                   loc=2,
                   fontsize='small')
    else:
        plt.legend([pcaF1Line, dpdpcaF1Line, privateF1Line],
                   ['PCA', 'DPDPCA', 'PrivateLocalPCA'],
                   loc=4,
                   fontsize='small')
    # plt.axis([0,10,0.4,1.0]);
    plt.xlabel('Number of Principal Components', fontsize=18)
    plt.ylabel('F1-Score', fontsize=18)
    plt.title(datasetTitle, fontsize=18)
    plt.xticks(x)
    ax = plt.gca()
    if x[-1] > 100:
        majorLocator = MultipleLocator(8)
    else:
        majorLocator = MultipleLocator(2)
    ax.xaxis.set_major_locator(majorLocator)
    if figSavedPath is None:
        plt.show()
    else:
        plt.savefig(figSavedPath + "dataOwner_" + datasetTitle + '.pdf',
                    format='pdf',
                    dpi=1000)
Exemple #6
0
def drawExplainedVariance(datasetTitle,data=None,path=None,figSavedPath=None):
    plt.clf();
    if path is not None:
        data = np.loadtxt(path,delimiter=",");


    '''
    x = data[:,0];
    gaussianPercent,wishartPercent is the percentage over the non-noise PCA.
    gaussianPercent = data[:,2]/data[:,1];
    wishartPercent = data[:,3]/data[:,1];
    
    y1Line,y2Line = plt.plot(x, gaussianPercent, 'bo-', x, wishartPercent, 'r^-');
    if datasetTitle is 'german':
        plt.legend([y1Line,y2Line], ['Gaussian Noise','Wishart Noise'],loc=2);
    else:
        plt.legend([y1Line,y2Line], ['Gaussian Noise','Wishart Noise'],loc=4);
    '''
    #x = range(1, data.shape[1]+1);
    if data.shape[1]<20:
        x = np.arange(1,data.shape[1]+1);
    else:
        x = np.arange(1,data.shape[1]+1,data.shape[1]/20);
    pcaIndices = np.arange(0,210,21);
    print pcaIndices;
    pcaVal = data[pcaIndices];
    pcaValMean,pcaValStd = gf.calcMeanandStd(pcaVal);
    pcaLine = plt.errorbar(x, pcaValMean[x - 1], yerr=pcaValStd[x - 1], fmt='b-', capsize=4);


    gepsiIndices = np.arange(1,210,21);
    gepsiVal = data[gepsiIndices];
    gepsiValMean,gepsiValStd = gf.calcMeanandStd(gepsiVal);
    #y1Line,y2Line = plt.plot(x, pcaValMean, 'bo-', x, pcaValStd, 'r^-');
    #gepsi1Line = plt.errorbar(x,gepsiValMean[x-1],yerr=gepsiValStd[x-1],fmt='g-',capsize=4);
    gepsiIndices = np.arange(5,210,21);
    gepsiVal = data[gepsiIndices];
    gepsiValMean,gepsiValStd = gf.calcMeanandStd(gepsiVal);
    gepsi5Line = plt.errorbar(x,gepsiValMean[x-1],yerr=gepsiValStd[x-1],fmt='r-',capsize=4);

    gepsiIndices = np.arange(9,210,21);
    gepsiVal = data[gepsiIndices];
    ggepsiValMean,gepsiValStd = gf.calcMeanandStd(gepsiVal);
    gepsi9Line = plt.errorbar(x,gepsiValMean[x-1],yerr=gepsiValStd[x-1],fmt='ro-.',capsize=4);

    wepsiIndices = np.arange(15,210,21);
    wepsiVal = data[wepsiIndices];
    wepsiValMean,wepsiValStd = gf.calcMeanandStd(wepsiVal);
    #print wepsiValStd;
    wepsi5Line = plt.errorbar(x,wepsiValMean[x-1],yerr=wepsiValStd[x-1],fmt='y-',capsize=4);
    wepsiIndices = np.arange(19,210,21);
    wepsiVal = data[wepsiIndices];
    wepsiValMean,wepsiValStd = gf.calcMeanandStd(wepsiVal);
    wepsi9Line = plt.errorbar(x,wepsiValMean[x-1],yerr=wepsiValStd[x-1],fmt='yo-',capsize=4);

    plt.axis([0,data.shape[1]+1,0,1.1]);
    #plt.axis([0,10,0.4,1.0]);
    plt.xlabel('Epsilon',fontsize=18);
    plt.ylabel('Captured Energy',fontsize=18);
    plt.title(datasetTitle, fontsize=18);
    plt.xticks(x);

    if figSavedPath is None:
        plt.show();
    else:
        plt.savefig(figSavedPath+"explainedVariance_"+datasetTitle+'.pdf', format='pdf', dpi=1000);
Exemple #7
0
def drawVariance_x_epsilon(datasetTitle,data=None,path=None,figSavedPath=None):
    plt.clf();
    if path is not None:
        data = np.loadtxt(path, delimiter=",");
    x = np.arange(0.1, 1.1, 0.1);
    tmpDim = 1;
    for i in range(data.shape[1]):
        if data[0,i]>0.8:
            tmpDim = i;
            break;
    print "print dimension: %d" % tmpDim;
    #tmpDim = 1;
    pcaRes = [];
    gRes = [];
    wRes = [];
    pcaVal = data[np.arange(0,190,21),tmpDim];
    print pcaVal;
    for i in np.arange(0,190,21):
        tmpRange = np.arange(i+1,i+11);
        #print len(tmpRange);
        #gRes.append(data[tmpRange,tmpDim]/data[i,tmpDim]);
        gRes.append(data[tmpRange, tmpDim]);
    print gRes;
    for i in np.arange(11,200,21):
        tmpRange = np.arange(i,i+10);
        #print len(tmpRange);
        #wRes.append(data[tmpRange,tmpDim]/data[i-11,tmpDim]);
        wRes.append(data[tmpRange, tmpDim]);

    gMean, gStd = gf.calcMeanandStd(np.asarray(gRes));
    #gErrorLine = plt.errorbar(x, gMean, yerr=gStd, fmt='r', capsize=4);
    #gLine, = plt.plot(x,gMean,'r-');

    wMean,wStd = gf.calcMeanandStd(np.asarray(wRes));
    #wErrorLine = plt.errorbar(x, wMean, yerr=wStd, fmt='g', capsize=4);
    #wLine, = plt.plot(x,wMean,'g-');

    #yMin = min(np.amin(gMean),np.amin(wMean));
    #yMax = max(np.amax(gMean),np.amax(wMean));
    yMin = np.amin(gMean);
    yMax = np.amax(gMean);
    toPlot = [];
    gResArray = np.asarray(gRes);
    for i in range(gResArray.shape[1]):
        toPlot.append(gResArray[:,i]);
    ax = plt.gca();
    ax.boxplot(toPlot,widths=0.05,positions=x,showfliers=False,boxprops={'color':'indigo'});

    plt.axis([0.05, 1.05, yMin*0.94, 1.06*yMax]);
    ax.set_xticklabels(x);
    #plt.legend([gLine, wLine], ['Gaussian Noise', 'Wishart Noise'], loc=4);
    # plt.axis([0,10,0.4,1.0]);
    plt.xlabel('Epsilon', fontsize=18);
    plt.ylabel('Captured Energy', fontsize=18);
    plt.title(datasetTitle, fontsize=18);
    #plt.xticks(x);
    #plt.tight_layout();
    plt.gcf().subplots_adjust(left=0.15)
    if figSavedPath is None:
        plt.show();
    else:
        plt.savefig(figSavedPath + "explainedVariance_" + datasetTitle + '_box.pdf', format='pdf', dpi=1000);
def drawF1Score(datasetTitle, data=None, path=None, figSavedPath=None):
    plt.clf()
    if path is not None:
        data = np.loadtxt(path, delimiter=",")
    numOfTrails = data.shape[0] / 10
    print "Number of points on x-axis: %d" % numOfTrails
    """
    minVector = np.amin(data[:,1:],axis=0);
    yMin = min(minVector);
    maxVector = np.amax(data[:,1:],axis=0);
    yMax = max(maxVector);

    yMin = (yMin-0.1) if (yMin-0.1)>0 else 0;
    yMax = (yMax+0.1) if (yMax+0.1)<1 else 1;
    #x = [10,40,70,100,130,160,190,220,250,280,310,340];
    y1Line,y2Line,y3Line = plt.plot(x, data[:,1], 'bo-', x, data[:,2], 'r^-',x, data[:,3], 'gs-');

    plt.legend([y1Line,y2Line,y3Line], ['PCA', 'Gaussian Noise','Wishart Noise'],loc=4);
    """
    x = np.arange(100, 1100, 100)
    targetDimension = 9
    pcaF1 = data[targetDimension::20, 3::3]
    dpdpcaF1 = data[targetDimension + 1::20, 4::3]
    minVector = np.amin(pcaF1, axis=0)
    yMin = min(minVector)
    maxVector = np.amax(dpdpcaF1, axis=0)
    yMax = max(maxVector)

    yMin = (yMin - 0.05) if (yMin - 0.05) > 0 else 0
    yMax = (yMax + 0.05) if (yMax + 0.05) < 1 else 1.05

    pcaF1Mean, pcaF1Std = gf.calcMeanandStd(np.asarray(pcaF1))
    pcaF1ErrorLine = plt.errorbar(x,
                                  pcaF1Mean,
                                  yerr=pcaF1Std,
                                  fmt='b',
                                  capsize=4)
    pcaF1Line, = plt.plot(x, pcaF1Mean, 'b-')

    dpdpcaF1Mean, dpdpcaF1Std = gf.calcMeanandStd(np.asarray(dpdpcaF1))
    dpdpcaF1ErrorLine = plt.errorbar(x,
                                     dpdpcaF1Mean,
                                     yerr=dpdpcaF1Std,
                                     fmt='r',
                                     capsize=4)
    dpdpcaF1Line, = plt.plot(x, dpdpcaF1Mean, 'r-')

    plt.axis([0, x[-1] + 100, yMin, yMax])
    # plt.axis([0,10,0.4,1.0]);
    plt.legend([pcaF1Line, dpdpcaF1Line], ['PCA', 'DPDPCA'], loc=4)
    plt.xlabel('Number of Epochs', fontsize=18)
    plt.ylabel('F1-Score', fontsize=18)
    plt.title(datasetTitle, fontsize=18)
    plt.xticks(x)

    if figSavedPath is None:
        plt.show()
    else:
        plt.savefig(figSavedPath + "NN_" + datasetTitle + '.pdf',
                    format='pdf',
                    dpi=1000)