class TaxCatalogBuilder: def __init__(self, server=None): ''' @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded. @param stopwords: file of stopwords ''' self.server = server if server is None: self.server = SOLR_SERVER self._record_count = 0l self._byte_count = 0l self._add_byte_count = 0l self._records = [] self.count = 0 self.set_server(self.server) from CommonsUtils import ConfigUtility ## Load file self.utility = ConfigUtility(None) self.stopwords = set([]) def add_stopwords(self, stopfile): if not os.path.exists(stopfile): raise Exception("No stopwords found at " + stopfile) print "Loading stopwords ", stopfile _stopwords_list = self.utility.loadListFromFile(stopfile) self.stopwords.add(_stopwords_list) def get_starting_id(self, cat): offset = CATALOG_REGISTRY.get(cat) if not offset: raise Exception("Catalog is not registered: " + cat) return offset def set_server(self, server): self.server_url = server if not self.server_url: return try: from pysolr import Solr self.server = Solr(self.server_url, timeout=600) print "SERVER ", self.server_url, self.server except Exception, err: print "Problem with that server %s, ERR=%s" % (self.server_url, err)
class TaxCatalogBuilder: def __init__(self, server=None): ''' @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded. @param stopwords: file of stopwords ''' self.server = server if server is None: self.server = SOLR_SERVER self._record_count = 0l self._byte_count = 0l self._add_byte_count = 0l self._records = [] self.count = 0 self.set_server(self.server) from CommonsUtils import ConfigUtility ## Load file self.utility = ConfigUtility(None) self.stopwords = set( [] ) def add_stopwords( self, stopfile ): if not os.path.exists(stopfile): raise Exception("No stopwords found at " + stopfile) print "Loading stopwords ", stopfile _stopwords_list = self.utility.loadListFromFile(stopfile) self.stopwords.add( _stopwords_list ) def get_starting_id(self, cat): offset = CATALOG_REGISTRY.get(cat) if not offset: raise Exception("Catalog is not registered: " + cat) return offset def set_server(self, server): self.server_url = server if not self.server_url: return try: from pysolr import Solr self.server = Solr(self.server_url, timeout=600) print "SERVER ", self.server_url, self.server except Exception, err: print "Problem with that server %s, ERR=%s" % (self.server_url, err)
def __init__(self, server=None): ''' @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded. @param stopwords: file of stopwords ''' self.server = server if server is None: self.server = SOLR_SERVER self._record_count = 0l self._byte_count = 0l self._add_byte_count = 0l self._records = [] self.count = 0 self.set_server(self.server) from CommonsUtils import ConfigUtility ## Load file self.utility = ConfigUtility(None) self.stopwords = set([])
def __init__(self, server=None): ''' @param server: solr server http URL; Not solrhome -- this is not SolrEmbedded. @param stopwords: file of stopwords ''' self.server = None self.set_server(server) self._record_count = 0l self._byte_count = 0l self._add_byte_count = 0l self.commit_rate = -1 self._records = [] self.count = 0 from CommonsUtils import ConfigUtility ## Load file self.utility = ConfigUtility(None) self.stopwords = set( [] )