Python graphical_lasso 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: graphical_lasso

메소드/함수: graphical_lasso

hotexamples.com에서의 예제들: 6

Python graphical_lasso - 6개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 graphical_lasso.graphical_lasso에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: stockmarketstructure.py 프로젝트: Kristian60/NetworkRisk

def StockMarket():

    X = pd.read_csv('data/TData9313_final5.csv', index_col=0)
    X.index = pd.to_datetime(X.index)
    X = X['20080101':'20091231']
    X = X.resample('B', how='last').ffill().dropna(axis=0,
                                                   how='all').dropna(axis=1)
    #X = X.dropna(axis=1)
    X = np.log(X).diff()[1:]
    pd.DataFrame(np.round(np.cov(X.T), 5), columns=X.columns,
                 index=X.columns).to_latex('covariancetable.tex')

    print pd.DataFrame(np.round(np.cov(X.T), 5),
                       columns=X.columns,
                       index=X.columns).max()
    print pd.DataFrame(np.round(np.cov(X.T), 5),
                       columns=X.columns,
                       index=X.columns)
    print pd.DataFrame(np.round(np.cov(X.T), 5),
                       columns=X.columns,
                       index=X.columns).idxmax()

    X /= X.std(axis=0)

    covariance_, precision_ = graphical_lasso(X, 0.6)
    print pd.DataFrame(precision_)

    gephi = pd.DataFrame()
    nodes = pd.DataFrame()
    nr = 0
    artdf = pd.DataFrame(precision_, columns=X.columns, index=X.columns)
    artdf2 = pd.DataFrame(covariance_, columns=X.columns, index=X.columns)
    print "Create Gephi Files"
    for nr1, i in enumerate(artdf.index):
        for nr2, j in enumerate(artdf.columns):
            if nr1 > nr2:
                if abs(artdf.loc[i, j]) > 0:
                    gephi.loc[nr, 'Source'] = nr1 + 1
                    gephi.loc[nr, 'Target'] = nr2 + 1
                    gephi.loc[nr, 'Type'] = 'Undirected'
                    gephi.loc[nr, 'Id'] = nr + 1
                    gephi.loc[nr, 'Label'] = nr1 + 1
                    gephi.loc[nr, 'Weight'] = abs(artdf.loc[i, j])
                    gephi.loc[nr, 'Weight'] = abs(artdf2.loc[i, j])
                    gephi.loc[nr, 'name'] = str(i)
                    gephi.loc[nr, 'toname'] = str(j)
                    nr += 1
        nodes.loc[nr1, 'Nodes'] = nr1 + 1
        nodes.loc[nr1, 'Id'] = nr1 + 1
        nodes.loc[nr1, 'Label'] = str(i).split('feat')[0]

    gephi.to_csv('gephi.csv')
    nodes.to_csv('nodes.csv')

예제 #2

파일 보기

파일: stockmarketstructure.py 프로젝트: Kristian60/NetworkRisk

def StockMarket():

    X = pd.read_csv('data/TData9313_final5.csv',index_col=0)
    X.index = pd.to_datetime(X.index)
    X = X['20080101':'20091231']
    X = X.resample('B',how='last').ffill().dropna(axis=0,how='all').dropna(axis=1)
    #X = X.dropna(axis=1)
    X = np.log(X).diff()[1:]
    pd.DataFrame(np.round(np.cov(X.T),5),columns=X.columns,index=X.columns).to_latex('covariancetable.tex')

    print pd.DataFrame(np.round(np.cov(X.T),5),columns=X.columns,index=X.columns).max()
    print pd.DataFrame(np.round(np.cov(X.T),5),columns=X.columns,index=X.columns)
    print pd.DataFrame(np.round(np.cov(X.T),5),columns=X.columns,index=X.columns).idxmax()


    X /= X.std(axis=0)

    covariance_,precision_ = graphical_lasso(X,0.6)
    print pd.DataFrame(precision_)

    gephi = pd.DataFrame()
    nodes = pd.DataFrame()
    nr = 0
    artdf = pd.DataFrame(precision_,columns=X.columns,index=X.columns)
    artdf2 = pd.DataFrame(covariance_,columns=X.columns,index=X.columns)
    print "Create Gephi Files"
    for nr1,i in enumerate(artdf.index):
        for nr2,j in enumerate(artdf.columns):
            if nr1 > nr2:
                if abs(artdf.loc[i,j]) > 0:
                    gephi.loc[nr,'Source'] = nr1+1
                    gephi.loc[nr,'Target'] = nr2+1
                    gephi.loc[nr,'Type'] = 'Undirected'
                    gephi.loc[nr,'Id'] = nr+1
                    gephi.loc[nr,'Label'] = nr1+1
                    gephi.loc[nr,'Weight'] = abs(artdf.loc[i,j])
                    gephi.loc[nr,'Weight'] = abs(artdf2.loc[i,j])
                    gephi.loc[nr,'name'] = str(i)
                    gephi.loc[nr,'toname'] = str(j)
                    nr += 1
        nodes.loc[nr1,'Nodes'] = nr1+1
        nodes.loc[nr1,'Id'] = nr1+1
        nodes.loc[nr1,'Label'] = str(i).split('feat')[0]



    gephi.to_csv('gephi.csv')
    nodes.to_csv('nodes.csv')

예제 #3

파일 보기

파일: glasso.py 프로젝트: Kristian60/NetworkRisk

def Main():
    cov = np.array([[0.5,0.2,0.1],[0.2,0.6,0.09],[0.1,0.09,0.7]])
    data = np.random.multivariate_normal([0,0,0],cov,5000)
    rows = 5
    cls = 5
    fig1,axs = plt.subplots(rows,cls,figsize=(20,16),facecolor='grey',edgecolor = 'black')
    fig1.subplots_adjust(hspace = .5, wspace=.5)
    axs = axs.ravel()

    for cc in range(rows*cls):
        cov = graphical_lasso(data,alpha=0.01*cc)
        print cov[0]
        seaborn.heatmap(pd.DataFrame(np.array(cov[0])),ax=axs[cc])
        axs[cc].set_title(cc)

    plt.tight_layout()
    plt.show()

예제 #4

파일 보기

def Main():
    cov = np.array([[0.5, 0.2, 0.1], [0.2, 0.6, 0.09], [0.1, 0.09, 0.7]])
    data = np.random.multivariate_normal([0, 0, 0], cov, 5000)
    rows = 5
    cls = 5
    fig1, axs = plt.subplots(rows,
                             cls,
                             figsize=(20, 16),
                             facecolor='grey',
                             edgecolor='black')
    fig1.subplots_adjust(hspace=.5, wspace=.5)
    axs = axs.ravel()

    for cc in range(rows * cls):
        cov = graphical_lasso(data, alpha=0.01 * cc)
        print cov[0]
        seaborn.heatmap(pd.DataFrame(np.array(cov[0])), ax=axs[cc])
        axs[cc].set_title(cc)

    plt.tight_layout()
    plt.show()

예제 #5

파일 보기

파일: stockmarketstructure.py 프로젝트: Kristian60/NetworkRisk

def StockMarketOLD():
    ###############################################################################
    # Retrieve the data from Internet

    # Choose a time period reasonnably calm (not too long ago so that we get
    # high-tech firms, and before the 2008 crash)
    d1 = datetime.datetime(2005, 1, 1)
    d2 = datetime.datetime(2009, 12, 31)

    # kraft symbol has now changed from KFT to MDLZ in yahoo
    symbol_dict = {
        'TOT': 'Total',
        'XOM': 'Exxon',
        'CVX': 'Chevron',
        'COP': 'ConocoPhillips',
        'VLO': 'Valero Energy',
        'MSFT': 'Microsoft',
        'IBM': 'IBM',
        'TWX': 'Time Warner',
        'CMCSA': 'Comcast',
        #'CVC': 'Cablevision',
        #'YHOO': 'Yahoo',
        #'DELL': 'Dell',
        'HPQ': 'HP',
        'AMZN': 'Amazon',
        'TM': 'Toyota',
        'CAJ': 'Canon',
        'MTU': 'Mitsubishi',
        'SNE': 'Sony',
        #'F': 'Ford',
        'HMC': 'Honda',
        #'NAV': 'Navistar',
        'NOC': 'Northrop Grumman',
        'BA': 'Boeing',
        'KO': 'Coca Cola',
        'MMM': '3M',
        'MCD': 'Mc Donalds',
        #'PEP': 'Pepsi',
        'MDLZ': 'Kraft Foods',
        'K': 'Kellogg',
        'UN': 'Unilever',
        'MAR': 'Marriott',
        'PG': 'Procter Gamble',
        'CL': 'Colgate-Palmolive',
        'GE': 'General Electrics',
        'WFC': 'Wells Fargo',
        'JPM': 'JPMorgan Chase',
        #'AIG': 'AIG',
        'AXP': 'American Express',
        'BAC': 'Bank of America',
        'GS': 'Goldman Sachs',
        'AAPL': 'Apple',
        'SAP': 'SAP',
        'CSCO': 'Cisco',
        'TXN': 'Texas Instruments',
        'XRX': 'Xerox',
        #'LMT': 'Lookheed Martin',
        'WMT': 'Wal-Mart',
        'WBA': 'Walgreen',
        'HD': 'Home Depot',
        'GSK': 'GlaxoSmithKline',
        'PFE': 'Pfizer',
        'SNY': 'Sanofi-Aventis',
        'NVS': 'Novartis',
        'KMB': 'Kimberly-Clark',
        'R': 'Ryder',
        'GD': 'General Dynamics',
        'RTN': 'Raytheon',
        'CVS': 'CVS',
        'CAT': 'Caterpillar',
        'DD': 'DuPont de Nemours',

        #'GM': 'General Motors',
        #'GOOG' : 'Google',
        'ORCL': 'Oracle',
        'NVO': 'Novo Nordisk',
        'LLY': 'Eli Lilly and Company',
        #'FB':'Facebook',
        'MRK': 'Merck Co',
    }
    '''
    symbol_dict = {'Danske.CO':'Danske Bank',
                   'Maersk-B.CO':'Maersk',
                   'DSV.CO':'DSV',
                   'FLS.CO':'FLS',
                   'Gen.CO':'Genmab',
                   'TDC.CO':'TDC',
                   'CARL-B.CO':'Carlsberg',
                   'CHR.CO':'Chr Hansen',
                   'COLO-B.CO':'Coloplast',
                   'GN.CO':'GN Store Nord',
                   'NDA-DKK.co':'Nordea',
                   'Novo-B.co':'Novo Nordisk',
                   'NZYM-B.CO':'Novozymes',
                   'PNDORA.CO':'Pandora',
                   'Tryg.co':'Tryg',
                   'VWS.CO':'Vestas',
                   'WDH.CO':'William Demant',
                   'G4s.co':'G4S',
                   'JYSK.CO':'Jyske Bank',
                   'KBHL.CO':'Kobenhavns Lufthavne',
                   'RBREW.CO':'Royal Unibrew',
                   'ROCK-B.CO':'Rockwool',
                   'SYDB.CO':'Sydbank',
                   'TOP.CO':'Topdanmark',
                   #'ALMB.CO':'Alm Brand',
                   'AURI-B.CO':'Auriga',
                   'Bava.CO':'Bavarian Nordic',
                   'BO.CO':'Bang Olufsen',
                   'DFDS.CO':'DFDS',
                   'DNORD.CO':'DS Norden',
                   'GES.CO':'Greentech',
                   'IC.CO':'IC Group',
                   'JDAN.CO':'Jeudan',
                   #'JUTBK.CO':'Jutlander Bank',
                   #'MATAS.CO':'Matas',
                   'NKT.CO':'NKT',
                   #'NNIT.CO':'NNIT',
                   'NORDJB.CO':'Nordjyske Bank',
                   #'ONXEO.CO':'Onxeo',
                   #'OSSR.CO':'Ossur',
                   'PAAL-B.CO':'Per Aarslef',
                   'RILBA.CO':'Ringkobing Landbobank',
                   'SAS-DKK.CO':'SAS',
                   'SCHO.CO':'Schouw Co.',
                   'SIM.CO':'SimCorp',
                   'Solar-B.co':'Solar B',
                   'SPNO.CO':'Spar Nord',
                   'TIV.CO':'Tivoli',
                   'UIE.CO':'UIE',
                   'VELO.CO':'Veloxis',
                   'ZEAL.CO':'Zealand Pharma'
                   }
    '''
    symbols, names = np.array(list(symbol_dict.items())).T

    for symbol in symbols:
        print symbol
        if len(
                pd.DataFrame(
                    np.array([[
                        q[5] for q in quotes_historical_yahoo(
                            symbol, d1, d2, True, False)
                    ]]).T)) != 1259:
            print symbol, len(
                pd.DataFrame(
                    np.array([[
                        q[5] for q in quotes_historical_yahoo(
                            symbol, d1, d2, True, False)
                    ]]).T))

    open = pd.DataFrame(
        np.array([[
            q[5] for q in quotes_historical_yahoo(symbol, d1, d2, True, False)
        ] for symbol in symbols]).T)
    close = pd.DataFrame(
        np.array([[
            q[6] for q in quotes_historical_yahoo(symbol, d1, d2, True, False)
        ] for symbol in symbols]).T)

    # The daily variations of the quotes are what carry most information
    variation = np.array(close - open)

    ###############################################################################
    # Learn a graphical structure from the correlations
    #edge_model = covariance.GraphLassoCV()

    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery

    df = pd.read_csv('data/TData9313_final5.csv', index_col=0)
    X = variation.copy()

    pd.DataFrame(np.round(np.cov(X.T), 3), columns=symbols,
                 index=symbols).to_latex('covariancetable.tex')

    print np.max(np.round(np.cov(X.T), 3))

    X /= X.std(axis=0)

    covariance_, precision_ = graphical_lasso(X, 0.3)

    print pd.DataFrame(precision_)

    #edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(covariance_)

    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane

    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(n_components=2,
                                                          eigen_solver='dense',
                                                          n_neighbors=6)

    embedding = node_position_model.fit_transform(X.T).T

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(20, 16))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    plt.annotate('From %s to %s' %
                 (d1.strftime('%Y-%m-%d'), d2.strftime('%Y-%m-%d')),
                 xy=(0.11, -0.37),
                 size=25)

    print X.shape

    for i in range(n_labels + 1):
        plt.annotate('Cluster %i: %s' %
                     ((i + 1), ', '.join(symbols[labels == i])),
                     xy=(-0.43, 0.02 - i * 0.02),
                     size=18)
        pass

    # Display a graph of the partial correlations
    #partial_correlations = edge_model.precision_.copy()
    partial_correlations = precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0],
                embedding[1],
                s=200 * d**2,
                c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0,
                        cmap=plt.get_cmap('Greys'),
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label,
                (x, y)) in enumerate(zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x,
                 y,
                 name,
                 size=22,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(
        embedding[0].min() - .25 * embedding[0].ptp(),
        embedding[0].max() + .20 * embedding[0].ptp(),
    )
    plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(),
             embedding[1].max() + .20 * embedding[1].ptp())

    plt.savefig('Graphs/StockCluster.pdf', bbox_inches='tight')
    plt.savefig('Graphs/StockCluster.svg', bbox_inches='tight')
    plt.show()

예제 #6

파일 보기

파일: stockmarketstructure.py 프로젝트: Kristian60/NetworkRisk

def StockMarketOLD():
    ###############################################################################
    # Retrieve the data from Internet

    # Choose a time period reasonnably calm (not too long ago so that we get
    # high-tech firms, and before the 2008 crash)
    d1 = datetime.datetime(2005, 1, 1)
    d2 = datetime.datetime(2009, 12, 31)

    # kraft symbol has now changed from KFT to MDLZ in yahoo
    symbol_dict = {
        'TOT': 'Total',
        'XOM': 'Exxon',
        'CVX': 'Chevron',
        'COP': 'ConocoPhillips',
        'VLO': 'Valero Energy',
        'MSFT': 'Microsoft',
        'IBM': 'IBM',
        'TWX': 'Time Warner',
        'CMCSA': 'Comcast',
        #'CVC': 'Cablevision',
        #'YHOO': 'Yahoo',
        #'DELL': 'Dell',
        'HPQ': 'HP',
        'AMZN': 'Amazon',
        'TM': 'Toyota',
        'CAJ': 'Canon',
        'MTU': 'Mitsubishi',
        'SNE': 'Sony',
        #'F': 'Ford',
        'HMC': 'Honda',
        #'NAV': 'Navistar',
        'NOC': 'Northrop Grumman',
        'BA': 'Boeing',
        'KO': 'Coca Cola',
        'MMM': '3M',
        'MCD': 'Mc Donalds',
        #'PEP': 'Pepsi',
        'MDLZ': 'Kraft Foods',
        'K': 'Kellogg',
        'UN': 'Unilever',
        'MAR': 'Marriott',
        'PG': 'Procter Gamble',
        'CL': 'Colgate-Palmolive',
        'GE': 'General Electrics',
        'WFC': 'Wells Fargo',
        'JPM': 'JPMorgan Chase',
        #'AIG': 'AIG',
        'AXP': 'American Express',
        'BAC': 'Bank of America',
        'GS': 'Goldman Sachs',
        'AAPL': 'Apple',
        'SAP': 'SAP',
        'CSCO': 'Cisco',
        'TXN': 'Texas Instruments',
        'XRX': 'Xerox',
        #'LMT': 'Lookheed Martin',
        'WMT': 'Wal-Mart',
        'WBA': 'Walgreen',
        'HD': 'Home Depot',
        'GSK': 'GlaxoSmithKline',
        'PFE': 'Pfizer',
        'SNY': 'Sanofi-Aventis',
        'NVS': 'Novartis',
        'KMB': 'Kimberly-Clark',
        'R': 'Ryder',
        'GD': 'General Dynamics',
        'RTN': 'Raytheon',
        'CVS': 'CVS',
        'CAT': 'Caterpillar',
        'DD': 'DuPont de Nemours',

        #'GM': 'General Motors',
        #'GOOG' : 'Google',
        'ORCL' : 'Oracle',
        'NVO':'Novo Nordisk',
        'LLY':'Eli Lilly and Company',
        #'FB':'Facebook',
        'MRK':'Merck Co',
        }
    '''
    symbol_dict = {'Danske.CO':'Danske Bank',
                   'Maersk-B.CO':'Maersk',
                   'DSV.CO':'DSV',
                   'FLS.CO':'FLS',
                   'Gen.CO':'Genmab',
                   'TDC.CO':'TDC',
                   'CARL-B.CO':'Carlsberg',
                   'CHR.CO':'Chr Hansen',
                   'COLO-B.CO':'Coloplast',
                   'GN.CO':'GN Store Nord',
                   'NDA-DKK.co':'Nordea',
                   'Novo-B.co':'Novo Nordisk',
                   'NZYM-B.CO':'Novozymes',
                   'PNDORA.CO':'Pandora',
                   'Tryg.co':'Tryg',
                   'VWS.CO':'Vestas',
                   'WDH.CO':'William Demant',
                   'G4s.co':'G4S',
                   'JYSK.CO':'Jyske Bank',
                   'KBHL.CO':'Kobenhavns Lufthavne',
                   'RBREW.CO':'Royal Unibrew',
                   'ROCK-B.CO':'Rockwool',
                   'SYDB.CO':'Sydbank',
                   'TOP.CO':'Topdanmark',
                   #'ALMB.CO':'Alm Brand',
                   'AURI-B.CO':'Auriga',
                   'Bava.CO':'Bavarian Nordic',
                   'BO.CO':'Bang Olufsen',
                   'DFDS.CO':'DFDS',
                   'DNORD.CO':'DS Norden',
                   'GES.CO':'Greentech',
                   'IC.CO':'IC Group',
                   'JDAN.CO':'Jeudan',
                   #'JUTBK.CO':'Jutlander Bank',
                   #'MATAS.CO':'Matas',
                   'NKT.CO':'NKT',
                   #'NNIT.CO':'NNIT',
                   'NORDJB.CO':'Nordjyske Bank',
                   #'ONXEO.CO':'Onxeo',
                   #'OSSR.CO':'Ossur',
                   'PAAL-B.CO':'Per Aarslef',
                   'RILBA.CO':'Ringkobing Landbobank',
                   'SAS-DKK.CO':'SAS',
                   'SCHO.CO':'Schouw Co.',
                   'SIM.CO':'SimCorp',
                   'Solar-B.co':'Solar B',
                   'SPNO.CO':'Spar Nord',
                   'TIV.CO':'Tivoli',
                   'UIE.CO':'UIE',
                   'VELO.CO':'Veloxis',
                   'ZEAL.CO':'Zealand Pharma'
                   }
    '''
    symbols, names = np.array(list(symbol_dict.items())).T

    for symbol in symbols:
        print symbol
        if len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T)) != 1259:
            print symbol, len(pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)]]).T))


    open = pd.DataFrame(np.array([[q[5] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T)
    close = pd.DataFrame(np.array([[q[6] for q in quotes_historical_yahoo(symbol,d1,d2,True,False)] for symbol in symbols]).T)

    # The daily variations of the quotes are what carry most information
    variation = np.array(close - open)

    ###############################################################################
    # Learn a graphical structure from the correlations
    #edge_model = covariance.GraphLassoCV()


    # standardize the time series: using correlations rather than covariance
    # is more efficient for structure recovery


    df = pd.read_csv('data/TData9313_final5.csv',index_col=0)
    X = variation.copy()

    pd.DataFrame(np.round(np.cov(X.T),3),columns=symbols,index=symbols).to_latex('covariancetable.tex')

    print np.max(np.round(np.cov(X.T),3))

    X /= X.std(axis=0)

    covariance_,precision_ = graphical_lasso(X,0.3)

    print pd.DataFrame(precision_)

    #edge_model.fit(X)

    ###############################################################################
    # Cluster using affinity propagation

    _, labels = cluster.affinity_propagation(covariance_)

    n_labels = labels.max()

    for i in range(n_labels + 1):
        print('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])))

    ###############################################################################
    # Find a low-dimension embedding for visualization: find the best position of
    # the nodes (the stocks) on a 2D plane

    # We use a dense eigen_solver to achieve reproducibility (arpack is
    # initiated with random vectors that we don't control). In addition, we
    # use a large number of neighbors to capture the large-scale structure.
    node_position_model = manifold.LocallyLinearEmbedding(
        n_components=2, eigen_solver='dense', n_neighbors=6)

    embedding = node_position_model.fit_transform(X.T).T

    ###############################################################################
    # Visualization
    plt.figure(1, facecolor='w', figsize=(20, 16))
    plt.clf()
    ax = plt.axes([0., 0., 1., 1.])
    plt.axis('off')

    plt.annotate('From %s to %s' % (d1.strftime('%Y-%m-%d'),d2.strftime('%Y-%m-%d')),xy=(0.11,-0.37),size=25)

    print X.shape

    for i in range(n_labels + 1):
        plt.annotate('Cluster %i: %s' % ((i + 1), ', '.join(symbols[labels == i])),xy=(-0.43,0.02-i*0.02),size=18)
        pass



    # Display a graph of the partial correlations
    #partial_correlations = edge_model.precision_.copy()
    partial_correlations = precision_.copy()
    d = 1 / np.sqrt(np.diag(partial_correlations))
    partial_correlations *= d
    partial_correlations *= d[:, np.newaxis]
    non_zero = (np.abs(np.triu(partial_correlations, k=1)) > 0.02)

    # Plot the nodes using the coordinates of our embedding
    plt.scatter(embedding[0], embedding[1], s=200 * d ** 2, c=labels,
                cmap=plt.cm.spectral)

    # Plot the edges
    start_idx, end_idx = np.where(non_zero)
    #a sequence of (*line0*, *line1*, *line2*), where::
    #            linen = (x0, y0), (x1, y1), ... (xm, ym)
    segments = [[embedding[:, start], embedding[:, stop]]
                for start, stop in zip(start_idx, end_idx)]
    values = np.abs(partial_correlations[non_zero])
    lc = LineCollection(segments,
                        zorder=0, cmap=plt.get_cmap('Greys'),
                        norm=plt.Normalize(0, .7 * values.max()))
    lc.set_array(values)
    lc.set_linewidths(15 * values)
    ax.add_collection(lc)

    # Add a label to each node. The challenge here is that we want to
    # position the labels to avoid overlap with other labels
    for index, (name, label, (x, y)) in enumerate(
            zip(names, labels, embedding.T)):

        dx = x - embedding[0]
        dx[index] = 1
        dy = y - embedding[1]
        dy[index] = 1
        this_dx = dx[np.argmin(np.abs(dy))]
        this_dy = dy[np.argmin(np.abs(dx))]
        if this_dx > 0:
            horizontalalignment = 'left'
            x = x + .002
        else:
            horizontalalignment = 'right'
            x = x - .002
        if this_dy > 0:
            verticalalignment = 'bottom'
            y = y + .002
        else:
            verticalalignment = 'top'
            y = y - .002
        plt.text(x, y, name, size=22,
                 horizontalalignment=horizontalalignment,
                 verticalalignment=verticalalignment,
                 bbox=dict(facecolor='w',
                           edgecolor=plt.cm.spectral(label / float(n_labels)),
                           alpha=.6))

    plt.xlim(embedding[0].min() - .25 * embedding[0].ptp(),
             embedding[0].max() + .20 * embedding[0].ptp(),)
    plt.ylim(embedding[1].min() - .20 * embedding[1].ptp(),
             embedding[1].max() + .20 * embedding[1].ptp())

    plt.savefig('Graphs/StockCluster.pdf',bbox_inches='tight')
    plt.savefig('Graphs/StockCluster.svg',bbox_inches='tight')
    plt.show()