Ejemplo n.º 1
0
def create_biplot(sIn, lWanted):
    """Run a PCA on state.x77 from R and generate its biplot. Color
    observations by k-means clustering."""
    from scipy.cluster.vq import kmeans, vq
    from statsmodels.sandbox.tools.tools_pca import pcasvd

    aData, sComponent, sValue = csv2np(sIn)
    aDataReshape = aData.reshape((-1, len(lWanted)))
    dDataReshaped = dict(
        zip(lWanted,
            np.array([aDataReshape[:, i] for i in range(len(lWanted))])))

    df = pd.DataFrame(dDataReshaped)
    #df = pd.io.parsers.read_csv(sIn, ',')

    #print df.describe()
    #print df.head()

    data = df[lWanted]

    data = (data - data.mean()) / data.std()
    pca = pcasvd(data, keepdim=0, demean=False)

    plt.figure(2)
    lX, lY = biplot(plt, pca, labels=data.index, xpc=0, ypc=1, bPoints=False)

    if sComponent != "":
        "get dot product for a variable - investigative tool for categorical units"
        #sComponent = "PrimaryUsage_Previous30days"

        iIndex1 = lWanted.index(sComponent)
        xVar = lX[iIndex1]
        yVar = lY[iIndex1]
        print('{}:  {}, {}'.format(sComponent, xVar, yVar))

        sTot = "TotalSpend"
        iIndex2 = lWanted.index(sTot)
        xTot = lX[iIndex2]
        yTot = lY[iIndex2]
        print('{}:  {}, {}'.format(sTot, xTot, yTot))

        rDotProd = (xVar * xTot) + (yVar * yTot)
        print("Dot product = ", rDotProd)

        plt.suptitle('"{}"  for ({} . {})'.format(sValue, sComponent, sTot))
        plt.title("Dot product = {}".format(round(rDotProd, 4)))

    plt.axis([-1.2, 1.2, -1.2, 1.2])
    plt.show()
Ejemplo n.º 2
0
def create_biplot(sIn, lWanted):
    """Run a PCA on state.x77 from R and generate its biplot. Color
    observations by k-means clustering."""
    from scipy.cluster.vq import kmeans, vq
    from statsmodels.sandbox.tools.tools_pca import pcasvd
    
    aData, sComponent, sValue = csv2np(sIn)
    aDataReshape = aData.reshape((-1,len(lWanted)))
    dDataReshaped = dict(zip(lWanted, np.array([aDataReshape[:,i] for i in range(len(lWanted)) ])))
    
    df = pd.DataFrame( dDataReshaped )
    #df = pd.io.parsers.read_csv(sIn, ',')
    
    #print df.describe()
    #print df.head()

    data = df[lWanted]
    
    data = (data - data.mean()) / data.std()
    pca = pcasvd(data, keepdim=0, demean=False)
    
    plt.figure(2)
    lX,lY = biplot(plt, pca, labels=data.index, xpc=0, ypc=1, bPoints=False)
    
    if sComponent != "":
        "get dot product for a variable - investigative tool for categorical units"
        #sComponent = "PrimaryUsage_Previous30days"
        
        iIndex1 = lWanted.index(sComponent)
        xVar = lX[iIndex1]
        yVar = lY[iIndex1]
        print('{}:  {}, {}'.format(sComponent,xVar, yVar))
        
        sTot = "TotalSpend"
        iIndex2 = lWanted.index(sTot)
        xTot = lX[iIndex2]
        yTot = lY[iIndex2]
        print('{}:  {}, {}'.format(sTot, xTot, yTot))
        
        rDotProd = (xVar * xTot) + (yVar * yTot)
        print("Dot product = ", rDotProd)
        
        plt.suptitle('"{}"  for ({} . {})'.format(sValue, sComponent, sTot))
        plt.title("Dot product = {}".format(round(rDotProd, 4)))
    
    plt.axis([-1.2,1.2,-1.2,1.2])
    plt.show()
Ejemplo n.º 3
0
def main():
    """Run a PCA on state.x77 from R and generate its biplot. Color
    observations by k-means clustering."""
    df = pd.io.parsers.read_csv('data/state.x77')
    print df.describe()
    print df.head()
 
    columns = ['Population', 'Income', 'Illiteracy',
               'Life Exp', 'Murder', 'HS Grad']
 
    data = df[columns]
    data = (data - data.mean()) / data.std()
    pca = pcasvd(data, keepdim=0, demean=False)
 
    values = data.values
    centroids, _ = kmeans(values, 3)
    idx, _ = vq(values, centroids)
 
    colors = ['gby'[i] for i in idx]
 
    plt.figure(1)
    biplot(plt, pca, labels=data.index, colors=colors,
           xpc=1, ypc=2)
    plt.show()
Ejemplo n.º 4
0
def main():
    """Run a PCA on state.x77 from R and generate its biplot. Color
    observations by k-means clustering."""
    df = pd.io.parsers.read_csv('data/state.x77')
    print df.describe()
    print df.head()

    columns = [
        'Population', 'Income', 'Illiteracy', 'Life Exp', 'Murder', 'HS Grad'
    ]

    data = df[columns]
    data = (data - data.mean()) / data.std()
    pca = pcasvd(data, keepdim=0, demean=False)

    values = data.values
    centroids, _ = kmeans(values, 3)
    idx, _ = vq(values, centroids)

    colors = ['gby'[i] for i in idx]

    plt.figure(1)
    biplot(plt, pca, labels=data.index, colors=colors, xpc=1, ypc=2)
    plt.show()
Ejemplo n.º 5
0
# f.write("];\n")
# f.write("criteria = {")
# for crit in criteria:
#     f.write("'" + crit + "' ")
# f.write('};')
# f.close()
#
uninetflows.insert(0,criteria)
with open("uninetflows.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerows(uninetflows)

df = pd.io.parsers.read_csv('uninetflows.csv')
data = df[criteria]
# data = (data - data.mean()) / data.std()
pca = pcasvd(data, keepdim=0, demean=False)
colors = ['kcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmykcgrbmy'[i] for i in clust_repart]
labels = ['************************************************************************************'[i] for i in clust_repart]
plt.figure(1)
# biplot(plt, pca, labels=data.index, colors=colors, xpc=1, ypc=2)
biplot(plt, pca, labels=labels, colors=colors, xpc=1, ypc=2)
plt.show()

# iter = 0
# sols = []
# while iter < 5:
#     prob.solve(pulp.GLPK())
#     print(LpStatus[prob.status])
#     sols.append(prob.variables())
#
#     iter += 1