Python Index Examples

Programming Language: Python

Namespace/Package Name: analysis.index

Class/Type: Index

Examples at hotexamples.com: 5

Python Index - 5 examples found. These are the top rated real world Python examples of analysis.index.Index extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Index(2)

add_documents(2)

finalize(2)

add_document(1)

get_filtered_terms(1)

get_top_terms(1)

Example #1

Show file

File: abstract.py Project: nihaofuyue0617/pythia

 def _filter_terms(self):
     '''
     Removes tokens that appear either too often or too rarely. It returns the corpus of 
     filtered tokens and also changes the token and word_frequencies lists of each document
     according to the filtered tokens.
     '''
     index = Index("kmeans_index")
     index.add_documents(self.db_documents)
     index.finalize()
     filtered_terms = index.get_filtered_terms(lowestf=0.1, highestf=0.2)
     corpus = []
     for id, document in self.document_dict.iteritems():
         filtered_tokens = []
         for token in document.tokens:
             if token in filtered_terms:
                 filtered_tokens.append(token)
         
         if len(filtered_tokens) == 0: #if all the tokens are removed then this document is worthless
             self.document_dict.pop(id)
         else: #if there are still tokens ammend the token list and the word frequencies list to include only the relevant tokens
             self.document_dict[id].tokens = filtered_tokens
             self.document_dict[id].word_frequencies = [item for item in self.document_dict[id].word_frequencies if item.word in filtered_tokens]
             
         corpus.append(filtered_tokens)
     return corpus

Example #2

Show file

File: index_tests.py Project: aurora1625/pythia

@author: george

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()

class TestPlayground(unittest.TestCase):
  
    def test_searching(self):        
        results = index.search_by_term("sales")
        
        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))
            
        expected = ['4f2d602780286c38a7000013', '4f2d603280286c38a700001e']
        self.assertEqual(expected, calculated)

Example #3

Show file

File: historical_egypt.py Project: giorgosera/pythia-hackathon

    try:
        os.makedirs(index_path)
    except os.error:
        raise Exception(index_path + " could not be created.")  
    
#Save the tweets in the db
f = CrawlerFactory()
t = f.get_crawler("topsy")

search_hashtags = "#25jan OR #jan25 OR #egypt OR #tahrir OR #fuckmubarak OR #mubarak \
                   OR #suez OR #DownWithMubarak OR #NOSCAF OR #SCAF OR #cairo"
t.search_for(search_hashtags)
from_date=datetime.datetime(2011, 01, 27, 23, 55, 0)
to_date=datetime.datetime(2011, 01, 29, 0, 0, 0)
t.search_between(from_date=from_date, 
                 to_date=to_date, 
                 granularity_days=0, 
                 granularity_hours=0, 
                 granularity_mins=5)
t.retrieve_items_of_type(EgyptTweet)
t.crawl(only_english=True)

#Index all the documents
docs = ws.get_documents_by_date(from_date, to_date, type=EgyptTweet)
index = Index(index_path)
print 'Started indexing'
index.add_documents(docs)
index.finalize()
print 'Started indexing'
for term in index.get_top_terms(limit=100):
    print term

Example #4

Show file

@author: george

My playground!
'''
import unittest, os
from analysis.index import Index
from database.warehouse import WarehouseServer
from database.model.tweets import TwoGroupsTweet

BASE_PATH = os.path.expanduser("~/virtualenvfyp/pythia/data/")
index_path = BASE_PATH + "test_index"
ws = WarehouseServer()
sample_docs = ws.get_n_documents(100, type=TwoGroupsTweet)

index = Index(index_path)
for doc in sample_docs:
    index.add_document(doc)
index.finalize()


class TestPlayground(unittest.TestCase):
    def test_searching(self):
        results = index.search_by_term("sales")

        calculated = []
        for doc in results:
            calculated.append(doc.get('id'))

        expected = ['4f2d602780286c38a7000013', '4f2d603280286c38a700001e']
        self.assertEqual(expected, calculated)

Example #5

Show file

File: historical_egypt.py Project: nihaofuyue0617/pythia

    except os.error:
        raise Exception(index_path + " could not be created.")

#Save the tweets in the db
f = CrawlerFactory()
t = f.get_crawler("topsy")

search_hashtags = "#25jan OR #jan25 OR #egypt OR #tahrir OR #fuckmubarak OR #mubarak \
                   OR #suez OR #DownWithMubarak OR #NOSCAF OR #SCAF OR #cairo"

t.search_for(search_hashtags)
##Last update ended at 2011-01-27 09:00:00
from_date = datetime.datetime(2011, 01, 24, 0, 0, 0)
to_date = datetime.datetime(2011, 01, 25, 0, 0, 0)
t.search_between(from_date=from_date,
                 to_date=to_date,
                 granularity_days=0,
                 granularity_hours=0,
                 granularity_mins=5)
t.retrieve_items_of_type(EgyptTweet)
t.crawl(only_english=True)

#Index all the documents
docs = ws.get_documents_by_date(from_date, to_date, type=EgyptTweet)
index = Index(index_path)
print 'Started indexing'
index.add_documents(docs)
index.finalize()
print 'Started indexing'
for term in index.get_top_terms(limit=100):
    print term