def store_terms(self, session, index, hash, record): if self.tempChunks: if not hash: return # Make sure you know what you're doing storeid = record.recordStore if not isinstance(storeid, types.IntType): storeid = self.storeHashReverse[storeid] docid = long(record.id) for k in hash.values(): try: text = k['text'].encode('utf-8') except: print text text = "" if not text: continue lineList = [text, str(docid), str(storeid), str(k['occurences']) ] try: lineList.append(nonTextToken.join(map(str, k['positions']) ) ) except KeyError: # non prox pass if not text or not text[0].isalnum(): tf = "other" else: tf = text[0].lower() try: outh = self.outFiles[index][tf] except: if session.task: fname = '_'.join([self.id, index.id, tf, session.task, 'TEMP'] ) else: fname = '_'.join([self.id, index.id, tf, 'TEMP']) fname = os.path.join(self.tempPath, fname) outh = file(fname, 'w') self.outFiles[index][tf] = outh outh.write(nonTextToken.join(lineList) + "\n") return if self.outFiles.has_key(index): BdbIndexStore.store_terms(self, session, index, hash, record) else: raise NotImplementedError()
def store_terms(self, session, index, hash, record): if self.tempChunks: if not hash: return # Make sure you know what you're doing storeid = record.recordStore if not isinstance(storeid, types.IntType): storeid = self.storeHashReverse[storeid] docid = long(record.id) for k in hash.values(): try: text = k['text'].encode('utf-8') except: print text text = "" if not text: continue lineList = [ text, str(docid), str(storeid), str(k['occurences']) ] try: lineList.append( nonTextToken.join(map(str, k['positions']))) except KeyError: # non prox pass if not text or not text[0].isalnum(): tf = "other" else: tf = text[0].lower() try: outh = self.outFiles[index][tf] except: if session.task: fname = '_'.join( [self.id, index.id, tf, session.task, 'TEMP']) else: fname = '_'.join([self.id, index.id, tf, 'TEMP']) fname = os.path.join(self.tempPath, fname) outh = file(fname, 'w') self.outFiles[index][tf] = outh outh.write(nonTextToken.join(lineList) + "\n") return if self.outFiles.has_key(index): BdbIndexStore.store_terms(self, session, index, hash, record) else: raise NotImplementedError()
def __init__(self, session, config, parent): BdbIndexStore.__init__(self, session, config, parent) self.tempChunks = self.get_setting(session, 'tempChunks') uri = self.get_path(session, 'srbServer') uri = uri.encode('utf-8') uri = uri.strip() if not uri: raise ConfigFileException("No srbServer to connect to.") else: info = parseSrbUrl(uri) for (a, v) in info.items(): setattr(self, a, v) if (isinstance(parent, Database)): sc = parent.id + "/" + self.id else: sc = self.id self.subcollection = "cheshire3/" + sc self.connection = None self._connect()
def __init__(self, session, config, parent): BdbIndexStore.__init__(self, session, config, parent) self.switchingClass = IrodsSwitchingBdbConnection self.vectorSwitchingClass = IrodsSwitchingBdbConnection self.coll = None self.cxn = None self.env = None self.host = self.get_setting(session, 'irodsHost', '') self.port = self.get_setting(session, 'irodsPort', 0) self.user = self.get_setting(session, 'irodsUser', '') self.zone = self.get_setting(session, 'irodsZone', '') self.passwd = self.get_setting(session, 'irodsPassword', '') self.resource = self.get_setting(session, 'irodsResource', '') self.allowStoreSubDirs = self.get_setting(session, 'allowStoreSubDirs', 1) # And open irods self._open(session)
def commit_indexing(self, session, index): if self.tempChunks: temp = self.tempPath keys = self.outFiles[index].keys() for f in self.outFiles[index].values(): f.flush() f.close() del self.outFiles[index] sort = self.get_path(session, 'sortPath') if hasattr(session, 'task'): task = session.task else: task = None if hasattr(session, 'phase'): load = 0 else: load = 1 sfiles = [] for k in keys: if task: fn = '_'.join([self.id, index.id, k, task]) else: fn = '_'.join([self.id, index.id, k]) tf = os.path.join(temp, fn + "_TEMP") sf = os.path.join(temp, fn + "_SORT") cmd = "%s -f %s -o %s" % (sort, tf, sf) f = commands.getoutput(cmd) os.remove(tf) if load: self.commit_indexing2(session, index, sf) else: sfiles.append(sf) return sfiles else: BdbIndexStore.commit_indexing(self, session, index)
def __init__(self, session, config, parent): BdbIndexStore.__init__(self, session, config, parent) if irods is None: raise MissingDependencyException(self.objectType, 'irods (PyRods)' ) self.switchingClass = IrodsSwitchingBdbConnection self.vectorSwitchingClass = IrodsSwitchingBdbConnection self.coll = None self.cxn = None self.env = None self.host = self.get_setting(session, 'irodsHost', '') self.port = self.get_setting(session, 'irodsPort', 0) self.user = self.get_setting(session, 'irodsUser', '') self.zone = self.get_setting(session, 'irodsZone', '') self.passwd = self.get_setting(session, 'irodsPassword', '') self.resource = self.get_setting(session, 'irodsResource', '') self.allowStoreSubDirs = self.get_setting(session, 'allowStoreSubDirs', 1) # And open iRODS self._open(session)
def begin_indexing(self, session, index): if not self.tempChunks: return BdbIndexStore.begin_indexing(self, session, index) temp = self.get_path(session, 'tempPath') if not os.path.isabs(temp): temp = os.path.join(self.get_path(session, 'defaultPath'), temp) self.tempPath = temp if (not os.path.exists(temp)): try: os.mkdir(temp) except: raise(ConfigFileException('TempPath does not exist and is ' 'not creatable.')) elif (not os.path.isdir(temp)): raise(ConfigFileException('TempPath is not a directory.')) # Make temp files on demand, in hash self.outFiles[index] = {}
def begin_indexing(self, session, index): if not self.tempChunks: return BdbIndexStore.begin_indexing(self, session, index) temp = self.get_path(session, 'tempPath') if not os.path.isabs(temp): temp = os.path.join(self.get_path(session, 'defaultPath'), temp) self.tempPath = temp if (not os.path.exists(temp)): try: os.mkdir(temp) except: raise (ConfigFileException( 'TempPath does not exist and is ' 'not creatable.')) elif (not os.path.isdir(temp)): raise (ConfigFileException('TempPath is not a directory.')) # Make temp files on demand, in hash self.outFiles[index] = {}
def __init__(self, session, config, parent): BdbIndexStore.__init__(self, session, config, parent) raise MissingDependencyException(self.objectType, 'irods (PyRods)')