Exemple #1
0
class TaxCatalogBuilder:
    def __init__(self, server=None):
        '''
           @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded.
           @param stopwords: file of stopwords
        '''

        self.server = server
        if server is None:
            self.server = SOLR_SERVER

        self._record_count = 0l
        self._byte_count = 0l
        self._add_byte_count = 0l

        self._records = []
        self.count = 0
        self.set_server(self.server)

        from CommonsUtils import ConfigUtility
        ## Load file
        self.utility = ConfigUtility(None)
        self.stopwords = set([])

    def add_stopwords(self, stopfile):

        if not os.path.exists(stopfile):
            raise Exception("No stopwords found at " + stopfile)

        print "Loading stopwords ", stopfile
        _stopwords_list = self.utility.loadListFromFile(stopfile)
        self.stopwords.add(_stopwords_list)

    def get_starting_id(self, cat):
        offset = CATALOG_REGISTRY.get(cat)
        if not offset:
            raise Exception("Catalog is not registered: " + cat)

        return offset

    def set_server(self, server):
        self.server_url = server
        if not self.server_url:
            return

        try:
            from pysolr import Solr
            self.server = Solr(self.server_url, timeout=600)
            print "SERVER ", self.server_url, self.server

        except Exception, err:
            print "Problem with that server %s, ERR=%s" % (self.server_url,
                                                           err)
Exemple #2
0
class TaxCatalogBuilder:
    
    def __init__(self, server=None):
        '''
           @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded.
           @param stopwords: file of stopwords
        '''
        
        self.server = server
        if server is None:
            self.server = SOLR_SERVER
        
        self._record_count = 0l
        self._byte_count = 0l
        self._add_byte_count = 0l
        
        self._records = []    
        self.count = 0
        self.set_server(self.server)
        
        from CommonsUtils import ConfigUtility
        ## Load file 
        self.utility = ConfigUtility(None)
        self.stopwords = set( [] )

    def add_stopwords( self, stopfile ):

        if not os.path.exists(stopfile):
            raise Exception("No stopwords found at " + stopfile)
       
        print "Loading stopwords ", stopfile
        _stopwords_list = self.utility.loadListFromFile(stopfile)
        self.stopwords.add( _stopwords_list )

    def get_starting_id(self, cat):
        offset = CATALOG_REGISTRY.get(cat)
        if not offset:
            raise Exception("Catalog is not registered: " + cat) 
        
        return offset 
               
    def set_server(self, server):
        self.server_url = server
        if not self.server_url:
            return
        
        try:            
            from pysolr import Solr
            self.server = Solr(self.server_url, timeout=600)
            print "SERVER ", self.server_url, self.server
            
        except Exception, err:
            print "Problem with that server %s, ERR=%s" % (self.server_url, err)
Exemple #3
0
    def __init__(self, server=None):
        '''
           @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded.
           @param stopwords: file of stopwords
        '''

        self.server = server
        if server is None:
            self.server = SOLR_SERVER

        self._record_count = 0l
        self._byte_count = 0l
        self._add_byte_count = 0l

        self._records = []
        self.count = 0
        self.set_server(self.server)

        from CommonsUtils import ConfigUtility
        ## Load file
        self.utility = ConfigUtility(None)
        self.stopwords = set([])
Exemple #4
0
    def __init__(self, server=None):
        '''
           @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded.
           @param stopwords: file of stopwords
        '''

        self.server = None
        self.set_server(server)

        self._record_count = 0l
        self._byte_count = 0l
        self._add_byte_count = 0l
        self.commit_rate = -1

        self._records = []
        self.count = 0

        from CommonsUtils import ConfigUtility
        ## Load file
        self.utility = ConfigUtility(None)
        self.stopwords = set( [] )