def __init__(self, name, type, Model): """ Input : - name : 'ooshop', 'monoprix' ... - Model : database entity model for exemple : MonoprixProduct """ self.name = name self.type = type self.Model = Model self.dir_path = IndexController.BASE_PATH + '/' +self.type+'/'+self.name self.service = SessionServer(self.dir_path)
class IndexController(object): """ Index controller, this method can: - build complete index from database - perform a query against index """ BASE_PATH = '/tmp/dalliz/matcher' METHOD = 'tfidf' MAX_RESULTS = 20 def __init__(self, name, type, Model): """ Input : - name : 'ooshop', 'monoprix' ... - Model : database entity model for exemple : MonoprixProduct """ self.name = name self.type = type self.Model = Model self.dir_path = IndexController.BASE_PATH + '/' +self.type+'/'+self.name self.service = SessionServer(self.dir_path) def build_all_index(self): """ Building the index from scratch. """ reg = re.compile(r'\b[a-zA-Z]{3,}\b') documents = self.Model.objects.all() documents = [{'id': d.id, 'tokens': reg.findall(unaccent(d.name.lower()))} for d in documents] self.service.train(documents, method=IndexController.METHOD) self.service.index(documents) def query(self, document ): """ Querying index with a document Input : -document : {'tokens': [list of words]} """ return self.service.find_similar(document, max_results = IndexController.MAX_RESULTS) def get_documents(self, datetime = None): """ This method returns documents filtered by updated time lesser thant the provided datetime. """ reg = re.compile(r'\b[a-zA-Z]{3,}\b') documents = self.Model.objects.all() if datetime is not None: documents = documents.filter(created__gte=datetime) return [{'id': d.id, 'tokens': reg.findall(unaccent(d.name.lower()))} for d in documents] def add_documents(self, documents): """ Adding documents to the index, if document id already exists in index, it will override it Input : - documents = [{'id':,'tokens':..}] """ try: self.service.index(documents) except AttributeError, e: self.service.train(documents, method=IndexController.METHOD)