def fetch_metadata(self, session, id, mType): """ Open irodsFile and get metadata from it. """ if (self.idNormalizer is not None): id = self.idNormalizer.process_string(session, id) elif type(id) == unicode: id = id.encode('utf-8') else: id = str(id) self._open(session) upwards = 0 if id.find('/') > -1 and self.allowStoreSubDirs: idp = id.split('/') id = idp.pop() while idp: dn = idp.pop(0) if not dn in self.coll.getSubCollections(): for x in range(upwards): self.coll.upCollection() raise ObjectDoesNotExistException(id) self.coll.openCollection(dn) upwards += 1 else: id = id.replace('/', '--') collPath = self.coll.getCollName() # This is much more efficient than getting the file as it's simply # interacting with iCAT umd = irods.getFileUserMetadata(self.cxn, '{0}/{1}'.format(collPath, id) ) # if self.resource: # f = self.coll.open(id, rescName=self.resource) # else: # f = self.coll.open(id) # # if not f: # for x in range(upwards): # self.coll.upCollection() # return None # umd = f.getUserMetadata() # f.close() val = None for x in umd: if x[0] == mType: val = icatValToPy(x[1], x[2]) break for x in range(upwards): self.coll.upCollection() return val
def find_documents(self, session, cache=0): # Given a location in irods, go there and descend looking for files c = self.coll files = c.getObjects() files.sort() fList = [x[0] for x in files] for i, f in enumerate(self._processFiles(session, fList)): md = {} irodsFilePath = '{0}/{1}'.format(c.getCollName(), f.filename) for x in irods.getFileUserMetadata(self.cxn, irodsFilePath): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md # Delete the file on its resource c.delete(files[i][0], files[i][1]) yield f dirs = c.getSubCollections() while dirs: d = dirs.pop(0) upColls = 0 for dx in d.split('/'): c.openCollection(dx) upColls += 1 files = c.getObjects() files.sort() fList = [x[0] for x in files] for i, f in enumerate(self._processFiles(session, fList)): md = {} irodsFilePath = '{0}/{1}'.format(c.getCollName(), f.filename) for x in irods.getFileUserMetadata(self.cxn, irodsFilePath): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md # Delete the file on its resource c.delete(files[i][0], files[i][1]) yield f ndirs = c.getSubCollections() dirs.extend(["%s/%s" % (d, x) for x in ndirs]) for x in range(upColls): c.upCollection()
def find_documents(self, session, cache=0): # Given a location in irods, go there and descend looking for files c = self.coll files = c.getObjects() files.sort() fList = [x[0] for x in files] for i, f in enumerate(self._processFiles(session, fList)): md = {} irodsFilePath = '{0}/{1}'.format(c.getCollName(), f.filename) for x in irods.getFileUserMetadata(self.cxn, irodsFilePath): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md # Delete the file on its resource c.delete(files[i][0], files[i][1]) yield f dirs = c.getSubCollections() while dirs: d = dirs.pop(0) upColls = 0 for dx in d.split('/'): c.openCollection(dx) upColls += 1 files = c.getObjects() files.sort() fList = [x[0] for x in files] for i, f in enumerate(self._processFiles(session, fList)): md = {} irodsFilePath = '{0}/{1}'.format(c.getCollName(), f.filename) for x in irods.getFileUserMetadata(self.cxn, irodsFilePath): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md # Delete the file on its resource c.delete(files[i][0], files[i][1]) yield f ndirs = c.getSubCollections() dirs.extend(["%s/%s" % (d, x) for x in ndirs]) for x in range(upColls): c.upCollection()
def find_documents(self, session, cache=0): # given a location in irods, go there and descend looking for files c = self.coll files = c.getObjects() files = [x[0] for x in files] files.sort() for f in self._processFiles(session, files): md = {} for x in irods.getFileUserMetadata(self.cxn, '{0}/{1}'.format(c.getCollName(), f.filename)): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md yield f dirs = c.getSubCollections() while dirs: d = dirs.pop(0) upColls = 0 for dx in d.split('/'): c.openCollection(dx) upColls += 1 files = c.getObjects() files = [x[0] for x in files] files.sort() for f in self._processFiles(session, files): md = {} for x in irods.getFileUserMetadata(self.cxn, '{0}/{1}'.format(c.getCollName(), f.filename)): md[x[0]] = icatValToPy(x[1], x[2]) if len(md): f.metadata['iRODS'] = md yield f ndirs = c.getSubCollections() dirs.extend(["%s/%s" % (d, x) for x in ndirs]) for x in range(upColls): c.upCollection()
def find_documents(self, session, cache=0): # read in single file doc = StringDocument(self.stream.read(), filename=self.stream.getName()) # attach any iRODS metadata umd = self.stream.getUserMetadata() self.stream.close() self.cxn.disconnect() md = {} for x in umd: md[x[0]] = icatValToPy(x[1], x[2]) if len(md): doc.metadata['iRODS'] = md if cache == 0: yield doc elif cache == 2: self.documents = [doc]
def find_documents(self, session, cache=0): # Read in single file doc = StringDocument(self.stream.read(), filename=self.stream.getName() ) # Attach any iRODS metadata umd = self.stream.getUserMetadata() self.stream.close() self.cxn.disconnect() md = {} for x in umd: md[x[0]] = icatValToPy(x[1], x[2]) if len(md): doc.metadata['iRODS'] = md if cache == 0: yield doc elif cache == 2: self.documents = [doc]
def _open(self, session): if self.cxn == None: # connect to iRODS myEnv, status = irods.getRodsEnv() host = self.host if self.host else myEnv.getRodsHost() port = self.port if self.port else myEnv.getRodsPort() user = self.user if self.user else myEnv.getRodsUserName() zone = self.zone if self.zone else myEnv.getRodsZone() conn, errMsg = irods.rcConnect(host, port, user, zone) if self.passwd: status = irods.clientLoginWithPassword(conn, self.passwd) else: status = irods.clientLogin(conn) if status: raise ConfigFileException("Cannot connect to iRODS: (%s) %s" % (status, errMsg)) self.cxn = conn self.env = myEnv resources = irods.getResources(self.cxn) self.resourceHash = {} for r in resources: self.resourceHash[r.getName()] = r if self.coll != None: # already open, just skip return None c = irods.irodsCollection(self.cxn, self.env.getRodsHome()) self.coll = c # move into cheshire3 section path = self.get_path(session, 'irodsCollection', 'cheshire3') dirs = c.getSubCollections() if not path in dirs: c.createCollection(path) c.openCollection(path) if self.get_setting(session, 'createSubDir', 1): # now look for object's storage area # maybe move into database collection if (isinstance(self.parent, Database)): sc = self.parent.id dirs = c.getSubCollections() if not sc in dirs: c.createCollection(sc) c.openCollection(sc) # move into store collection dirs = c.getSubCollections() if not self.id in dirs: c.createCollection(self.id) c.openCollection(self.id) # Fetch user metadata myMetadata = self.get_metadataTypes(session) umd = c.getUserMetadata() umdHash = {} for u in umd: umdHash[u[0]] = icatValToPy(*u[1:]) for md in myMetadata: try: setattr(self, md, umdHash[md]) except KeyError: # hasn't been set yet pass if self.totalItems != 0: self.meanWordCount = self.totalWordCount / self.totalItems self.meanByteCount = self.totalByteCount / self.totalItems else: self.meanWordCount = 1 self.meanByteCount = 1