Ejemplo n.º 1
0
        def store_terms(self, session, index, hash, record):
            if self.tempChunks:
                if not hash:
                    return
                
                # Make sure you know what you're doing
                storeid = record.recordStore
                if not isinstance(storeid, types.IntType):
                    storeid = self.storeHashReverse[storeid]
                docid = long(record.id)

                for k in hash.values():
                    try:
                        text = k['text'].encode('utf-8')
                    except:
                        print text
                        text = ""
                    if not text:
                        continue
                    lineList = [text,
                                str(docid),
                                str(storeid),
                                str(k['occurences'])
                                ]
                    try:
                        lineList.append(nonTextToken.join(map(str,
                                                              k['positions'])
                                                          )
                                        )
                    except KeyError:
                        # non prox
                        pass
                    if not text or not text[0].isalnum():
                        tf = "other"
                    else:
                        tf = text[0].lower()
                    
                    try:
                        outh = self.outFiles[index][tf]
                    except:
                        if session.task:
                            fname = '_'.join([self.id,
                                              index.id,
                                              tf,
                                              session.task,
                                              'TEMP']
                                             )
                        else:
                            fname = '_'.join([self.id, index.id, tf, 'TEMP'])
                        fname = os.path.join(self.tempPath, fname)
                        outh = file(fname, 'w')
                        self.outFiles[index][tf] = outh
                    outh.write(nonTextToken.join(lineList) + "\n")
                return
            if self.outFiles.has_key(index):
                BdbIndexStore.store_terms(self, session, index, hash, record)
            else:
                raise NotImplementedError()
Ejemplo n.º 2
0
        def store_terms(self, session, index, hash, record):
            if self.tempChunks:
                if not hash:
                    return

                # Make sure you know what you're doing
                storeid = record.recordStore
                if not isinstance(storeid, types.IntType):
                    storeid = self.storeHashReverse[storeid]
                docid = long(record.id)

                for k in hash.values():
                    try:
                        text = k['text'].encode('utf-8')
                    except:
                        print text
                        text = ""
                    if not text:
                        continue
                    lineList = [
                        text,
                        str(docid),
                        str(storeid),
                        str(k['occurences'])
                    ]
                    try:
                        lineList.append(
                            nonTextToken.join(map(str, k['positions'])))
                    except KeyError:
                        # non prox
                        pass
                    if not text or not text[0].isalnum():
                        tf = "other"
                    else:
                        tf = text[0].lower()

                    try:
                        outh = self.outFiles[index][tf]
                    except:
                        if session.task:
                            fname = '_'.join(
                                [self.id, index.id, tf, session.task, 'TEMP'])
                        else:
                            fname = '_'.join([self.id, index.id, tf, 'TEMP'])
                        fname = os.path.join(self.tempPath, fname)
                        outh = file(fname, 'w')
                        self.outFiles[index][tf] = outh
                    outh.write(nonTextToken.join(lineList) + "\n")
                return
            if self.outFiles.has_key(index):
                BdbIndexStore.store_terms(self, session, index, hash, record)
            else:
                raise NotImplementedError()
Ejemplo n.º 3
0
        def __init__(self, session, config, parent):
            BdbIndexStore.__init__(self, session, config, parent)
            self.tempChunks = self.get_setting(session, 'tempChunks')
            uri = self.get_path(session, 'srbServer')
            uri = uri.encode('utf-8')
            uri = uri.strip()
            if not uri:
                raise ConfigFileException("No srbServer to connect to.")
            else:
                info = parseSrbUrl(uri)
                for (a, v) in info.items():
                    setattr(self, a, v)

                if (isinstance(parent, Database)):
                    sc = parent.id + "/" + self.id
                else:
                    sc = self.id
                self.subcollection = "cheshire3/" + sc
                self.connection = None
                self._connect()
Ejemplo n.º 4
0
        def __init__(self, session, config, parent):
            BdbIndexStore.__init__(self, session, config, parent)
            self.tempChunks = self.get_setting(session, 'tempChunks')
            uri = self.get_path(session, 'srbServer')
            uri = uri.encode('utf-8')
            uri = uri.strip()
            if not uri:
                raise ConfigFileException("No srbServer to connect to.")
            else:
                info = parseSrbUrl(uri)
                for (a, v) in info.items():
                    setattr(self, a, v)

                if (isinstance(parent, Database)):
                    sc = parent.id + "/" + self.id
                else:
                    sc = self.id
                self.subcollection = "cheshire3/" + sc
                self.connection = None
                self._connect()
Ejemplo n.º 5
0
    def __init__(self, session, config, parent):
        BdbIndexStore.__init__(self, session, config, parent)

        self.switchingClass = IrodsSwitchingBdbConnection
        self.vectorSwitchingClass = IrodsSwitchingBdbConnection
        self.coll = None
        self.cxn = None
        self.env = None

        self.host = self.get_setting(session, 'irodsHost', '')
        self.port = self.get_setting(session, 'irodsPort', 0)
        self.user = self.get_setting(session, 'irodsUser', '')
        self.zone = self.get_setting(session, 'irodsZone', '')
        self.passwd = self.get_setting(session, 'irodsPassword', '')
        self.resource = self.get_setting(session, 'irodsResource', '')
        
        self.allowStoreSubDirs = self.get_setting(session, 'allowStoreSubDirs', 1)
        
        # And open irods
        self._open(session)
Ejemplo n.º 6
0
        def commit_indexing(self, session, index):
            if self.tempChunks:
                temp = self.tempPath
                keys = self.outFiles[index].keys()
                for f in self.outFiles[index].values():
                    f.flush()
                    f.close()
                del self.outFiles[index]
                sort = self.get_path(session, 'sortPath')
                if hasattr(session, 'task'):
                    task = session.task
                else:
                    task = None
                if hasattr(session, 'phase'):
                    load = 0
                else:
                    load = 1

                sfiles = []
                for k in keys:
                    if task:
                        fn = '_'.join([self.id, index.id, k, task])
                    else:
                        fn = '_'.join([self.id, index.id, k])
                    tf = os.path.join(temp, fn + "_TEMP")
                    sf = os.path.join(temp, fn + "_SORT")
                    cmd = "%s -f %s -o %s" % (sort, tf, sf)
                    f = commands.getoutput(cmd)
                    os.remove(tf)
                    if load:
                        self.commit_indexing2(session, index, sf)
                    else:
                        sfiles.append(sf)
                return sfiles
            else:
                BdbIndexStore.commit_indexing(self, session, index)
Ejemplo n.º 7
0
        def commit_indexing(self, session, index):
            if self.tempChunks:
                temp = self.tempPath
                keys = self.outFiles[index].keys()
                for f in self.outFiles[index].values():
                    f.flush()
                    f.close()
                del self.outFiles[index]
                sort = self.get_path(session, 'sortPath')
                if hasattr(session, 'task'):
                    task = session.task
                else:
                    task = None
                if hasattr(session, 'phase'):
                    load = 0
                else:
                    load = 1

                sfiles = []
                for k in keys:
                    if task:
                        fn = '_'.join([self.id, index.id, k, task])
                    else:
                        fn = '_'.join([self.id, index.id, k])
                    tf = os.path.join(temp, fn + "_TEMP")
                    sf = os.path.join(temp, fn + "_SORT")
                    cmd = "%s -f %s -o %s" % (sort, tf, sf)
                    f = commands.getoutput(cmd)
                    os.remove(tf)
                    if load:
                        self.commit_indexing2(session, index, sf)
                    else:
                        sfiles.append(sf)
                return sfiles               
            else:
                BdbIndexStore.commit_indexing(self, session, index)
Ejemplo n.º 8
0
    def __init__(self, session, config, parent):
        BdbIndexStore.__init__(self, session, config, parent)
        if irods is None:
            raise MissingDependencyException(self.objectType,
                                             'irods (PyRods)'
                                             )
        self.switchingClass = IrodsSwitchingBdbConnection
        self.vectorSwitchingClass = IrodsSwitchingBdbConnection
        self.coll = None
        self.cxn = None
        self.env = None

        self.host = self.get_setting(session, 'irodsHost', '')
        self.port = self.get_setting(session, 'irodsPort', 0)
        self.user = self.get_setting(session, 'irodsUser', '')
        self.zone = self.get_setting(session, 'irodsZone', '')
        self.passwd = self.get_setting(session, 'irodsPassword', '')
        self.resource = self.get_setting(session, 'irodsResource', '')

        self.allowStoreSubDirs = self.get_setting(session,
                                                  'allowStoreSubDirs',
                                                  1)
        # And open iRODS
        self._open(session)
Ejemplo n.º 9
0
        def begin_indexing(self, session, index):
            if not self.tempChunks:
                return BdbIndexStore.begin_indexing(self, session, index)
            temp = self.get_path(session, 'tempPath')
            if not os.path.isabs(temp):
                temp = os.path.join(self.get_path(session, 'defaultPath'),
                                    temp)
            self.tempPath = temp
            if (not os.path.exists(temp)):
                try:
                    os.mkdir(temp)
                except:
                    raise(ConfigFileException('TempPath does not exist and is '
                                              'not creatable.'))
            elif (not os.path.isdir(temp)):
                raise(ConfigFileException('TempPath is not a directory.'))

            # Make temp files on demand, in hash
            self.outFiles[index] = {}
Ejemplo n.º 10
0
        def begin_indexing(self, session, index):
            if not self.tempChunks:
                return BdbIndexStore.begin_indexing(self, session, index)
            temp = self.get_path(session, 'tempPath')
            if not os.path.isabs(temp):
                temp = os.path.join(self.get_path(session, 'defaultPath'),
                                    temp)
            self.tempPath = temp
            if (not os.path.exists(temp)):
                try:
                    os.mkdir(temp)
                except:
                    raise (ConfigFileException(
                        'TempPath does not exist and is '
                        'not creatable.'))
            elif (not os.path.isdir(temp)):
                raise (ConfigFileException('TempPath is not a directory.'))

            # Make temp files on demand, in hash
            self.outFiles[index] = {}
 def __init__(self, session, config, parent):
     BdbIndexStore.__init__(self, session, config, parent)
     raise MissingDependencyException(self.objectType, 'irods (PyRods)')