Python getKeywordToIndexMap Examples

Programming Language: Python

Namespace/Package Name: keywords

Method/Function: getKeywordToIndexMap

Examples at hotexamples.com: 3

Python getKeywordToIndexMap - 3 examples found. These are the top rated real world Python examples of keywords.getKeywordToIndexMap extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: matrices.py Project: ccphillippi/AFP

def tfIdf( articles, keywordsFilePath = settings.KEYWORDS_FILEPATH ):
    """Returns a sparse tf-idf Matrix
    
    :param articles: An iterable of article strings. See :func:`cleaner.retrieve.getCleanArticles`
    :param keywordsFilePath: Path to *keywords.csv*
    :type keywordsFilePath: str
    """
    keywordMap = keywords.getKeywordToIndexMap( keywordsFilePath );
    counts = count.WordCounter( keywordMap )( articles )
    return normalize.TfIdf()( counts )

Example #2

Show file

File: matrices.py Project: ccphillippi/AFP

                               columns = columns,
                               default_fill_value = 0 )

def _getCountRows( args ):
    timestamp, aggregator, wordCounter = args
    try:
        date = timestamp.date()
    except AttributeError:
        date = timestamp
    counts = wordCounter( retrieve.getDailyArticles( date ) )
    try:
        return ( date, aggregator( counts ) )
    except TypeError:
        return ( date, counts )
    
if __name__ == "__main__":    
    begin = datetime.date( 2011, 1, 3 )
    end = datetime.date( 2013, 11, 27 )
    keywordsFile = join( settings.KEYWORDS_DIR, 'splist.csv' )
    tickerList = keywords.getTickerList( keywordsFile )
    keywordsMap = keywords.getKeywordToIndexMap( keywordsFile )
    empiricalDf = getEmpiricalDataFrame( tickerList, begin, end, retrieve.adjustedClosesFilepath( filename = 'cleanSP.csv' ) )
    countDf = getCountDataFrame( tickerList,
                                 count.WordCounter( keywordsMap ),
                                 empiricalDf.index )
    tfidf = normalize.TfIdf()( countDf )
    empiricalDf = empiricalDf.ix[ tfidf.index ]
    tfidf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_CountCorr.csv' ) )
    empiricalDf.corr().to_csv( join( settings.RESULTS_DIR, 'hft_EmpCorr.csv' ) )
    # corr.to_csv( join( settings.RESULTS_DIR, 'corrtest_withSent_all.csv' ) )

Example #3

Show file

File: structure.py Project: ccphillippi/AFP

import sentiment

from copy import deepcopy

import numpy as np
from sklearn.covariance import GraphLassoCV
from sklearn import manifold, cluster
from matplotlib.collections import LineCollection
import pylab as pl

##############################################################################
# Retrieve the data
begin = datetime.date( 2011, 1, 3 )
end = datetime.date( 2013, 11, 27 )
tickerList = keywords.getTickerList()
keywordsMap = keywords.getKeywordToIndexMap()
sentCounter = count.SentimentWordCounter( keywordsMap, sentiment.classifier() )
mentionCounter = count.WordCounter( keywordsMap )

empiricalDf = matrices.getEmpiricalDataFrame( tickerList, begin, end )[ tickerList ]
getTfIdf = lambda wordCounter, aggregator: normalize.TfIdf()( matrices.getCountDataFrame( tickerList, wordCounter, empiricalDf.index, aggregator = aggregator ) )[ tickerList ]
tfIdfSentArticle = getTfIdf( sentCounter, None )[ tickerList ]
tfIdfSentDay = getTfIdf( sentCounter, np.sum )[ tickerList ]
tfIdfMentionArticle = getTfIdf( mentionCounter, None )[ tickerList ]
tfIdfMentionDay = getTfIdf( mentionCounter, np.sum )[ tickerList ]

matrices = { 'Empirical' : { 
                            'By Day' : { 'Data' : empiricalDf } 
              },
             'Signed Mentions' : {
                            'By Day' : { 'Data' : tfIdfSentDay },