Example #1
0
 def _handleLxmlFlow(self, node):
     code = []
     for c in node.iterchildren(tag=etree.Element):
         n = c.tag[c.tag.find('}') + 1:]
         if n == "object":
             code.extend(self._handleLxmlObject(c))
         elif n == "assign":
             try:
                 fro = c.attrib['from']
                 to = c.attrib['to']
             except:
                 raise ConfigFileException("Workflow element assign "
                                           "requires 'to' and 'from' "
                                           "attributes in %s" % self.id)
             code.append("%s = %s" % (to, fro))
         elif n == "for-each":
             fcode = self._handleForEach(c)
             code.extend(fcode)
             sub = self._handleLxmlFlow(c)
             if sub:
                 for s in sub:
                     code.append("    " + s)
             else:
                 code.append("    pass")
         elif n == "log":
             code.extend(self._handleLxmlLog(c))
         elif n == "try":
             code.append("try:")
             sub = self._handleLxmlFlow(c)
             for s in sub:
                 code.append("    " + s)
         elif n == "except":
             code.append("except Exception as err:")
             sub = self._handleLxmlFlow(c)
             for s in sub:
                 code.append("    " + s)
         elif n == "else":
             code.append("else:")
             sub = self._handleLxmlFlow(c)
             for s in sub:
                 code.append("    " + s)
         elif n == "break":
             code.append("break")
         elif n == "continue":
             code.append("continue")
         elif n == "return":
             code.append("return input")
         elif n == "raise":
             code.append("raise")
         elif n == "fork":
             code.extend(self._handleLxmlFork(c))
         else:
             try:
                 name = n.title()
                 fn = getattr(self, "_handleLxml%s" % name)
                 code.extend(fn(c))
             except:
                 raise ConfigFileException("Unknown workflow element: "
                                           "%s" % n)
     return code
Example #2
0
 def __init__(self, session, node, parent):
     self.protocol = "http://www.openarchives.org/OAI/2.0/OAI-PMH"
     self.recordNamespaces = {}  # key: metadataPrefix, value: XML Namespace
     self.schemaLocations = {}  # key: XML Namespace, value: Schema Location
     self.transformerHash = {
     }  # key: XML Namespace, value: Cheshire3 Transformer Object
     self.contacts = []
     ZeerexProtocolMap.__init__(self, session, node, parent)
     # some validation checks
     try:
         self.baseURL = 'http://%s:%d/%s' % (self.host, self.port,
                                             self.databaseName)
     except:
         raise ConfigFileException(
             "Unable to derive baseURL from host, port, database")
     # metadatPrefix oai_dc is mandatory
     if not 'oai_dc' in self.recordNamespaces:
         raise ConfigFileException(
             "Schema configuration for mandatory metadataPrefix 'oai_dc' required in schemaInfo."
         )
     # at least 1 adminEmail address is mandatory for Identify response
     if not len(self.contacts):
         raise ConfigFileException(
             "Contact e-mail address of a database administrator required in databaseInfo."
         )
Example #3
0
 def _verifyDatabases(self, session):
     """Verify Keyspace and ColumnFamilies.
     
     Verify existence of Keyspace and ColumnFamilies, creating if necessary.
     """
     try:
         self._openContainer(session)
     except pycassa.cassandra.ttypes.InvalidRequestException as e:
         if e.why == "Keyspace does not exist":
             # find a way to create keyspace
             with pycassa.connect('system', servers=self.servers) as cxn:
                 ks_def = pycassa.cassandra.ttypes.KsDef(
                     self.keyspace,
                     strategy_class=('org.apache.cassandra.locator.'
                                     'RackUnawareStrategy'), 
                     replication_factor=1, 
                     cf_defs=[]
                 )
                 cxn.add_keyspace(ks_def)
             self._openContainer(session)
         else:
             raise ConfigFileException("Cannot connect to Cassandra: {0!r}"
                                       "".format(e.args))
     except Exception as e:
         raise ConfigFileException("Cannot connect to Cassandra: {0!r}"
                                   "".format(e.args))
Example #4
0
    def process_record(self, session, record):
        u"Extract the attribute, or run the specified function, return data."
        # Check name against record metadata
        vals = []
        for src in self.sources:
            # list of {}s
            for xp in src:
                name = xp['string']
                typ = xp['type']
                if typ == 'xpath':
                    # handle old style
                    if hasattr(record, name):
                        vals.append([getattr(record, name)])
                    elif name == 'now':
                        # eg for lastModified/created etc
                        now = time.strftime("%Y-%m-%d %H:%M:%S")
                        vals.append([now])
                    else:
                        vals.append(None)
                elif typ == 'attribute':
                    if hasattr(record, name):
                        vals.append([getattr(record, name)])
                elif typ == 'function':
                    if name in ['now', 'now()']:
                        now = time.strftime("%Y-%m-%d %H:%M:%S")
                        vals.append([now])
                    else:
                        # nothing else defined?
                        raise ConfigFileException("Unknown function: "
                                                  "%s" % name)
                else:
                    raise ConfigFileException("Unknown metadata selector type:"
                                              " %s" % typ)

        return vals
Example #5
0
 def _handleLxmlConfigNode(self, session, node):
     if node.tag in self.simpleNodes:
         setattr(self, node.tag[node.tag.find('}') + 1:],
                 flattenTexts(node).strip())
     elif node.tag in ["flags", '{%s}flags' % CONFIG_NS]:
         # Extract Rights info
         # <flags> <flag> <object> <value> </flag> </flags>
         for c in node.iterchildren(tag=etree.Element):
             if c.tag in ["flag", '{%s}flag' % CONFIG_NS]:
                 obj = None
                 flag = None
                 for c2 in c.iterchildren(tag=etree.Element):
                     if c2.tag in ["object", '{%s}object' % CONFIG_NS]:
                         obj = flattenTexts(c2).strip()
                     elif c2.tag in ["value", '{%s}value' % CONFIG_NS]:
                         flag = flattenTexts(c2).strip()
                         if (flag not in self.allFlags
                                 and flag[:4] != "c3fn"):
                             msg = "Unknown flag: %s" % flag
                             raise ConfigFileException(msg)
                 if obj is None or flag is None:
                     msg = ("Missing object or value element for flag for "
                            "user %s" % self.username)
                     raise ConfigFileException()
                 f = self.flags.get(flag, [])
                 if (obj):
                     f.append(obj)
                 self.flags[flag] = f
     elif node.tag in ["history", '{%s}history' % CONFIG_NS]:
         # Extract user history
         pass
     elif node.tag in ["hostmask", '{%s}hostmask' % CONFIG_NS]:
         # Extract allowed hostmask list
         pass
Example #6
0
 def __init__(self, session, config, parent):
     self.sources = []
     SimpleSelector.__init__(self, session, config, parent)
     try:
         if len(self.sources[0]) != 2:
             raise ConfigFileException("SpanXPathSelector '{0}' requires "
                                       "exactly two XPaths".format(self.id))
     except IndexError:
         raise ConfigFileException("SpanXPathSelector '{0}' requires "
                                   "exactly 1 <source>".format(self.id))
Example #7
0
    def __init__(self, session, server, config):
        PreParser.__init__(self, session, server, config)
        self.source_re = re.compile("<open file '(.+?)', mode '.' at .*?>")
        
        # get settings from config
        # Az:  Check existence of settings and fail consistently rather than
        # die half way through execution
        self.mvServerPath = self.get_path(session, 'mvServerPath')
        if self.mvServerPath:
            # they've specified a local path to the server code
            # we should start a server locally with automatically generated port, in local-only mode
            if not os.path.exists(self.mvServerPath):
                raise ConfigFileException('Path type="mvServerPath" does not exist')

            host = '127.0.0.1'
            # find a random free port
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            err = True
            while (err):
                err = False
                port = random.randrange(10000)
                try: s.bind((host,port))
                except: err = True

            s.close()
            del s
            mvStdin, mvStdout = os.popen2('java -D64 -Djava.awt.headless=true -Xms40m -Xmx256m -jar %s %d -guess -out xml -link' % (self.mvServerPath, port), 't')
            
        else:
            # get settings for remote mv server
            host = self.get_setting(session, 'host')
            port  = self.get_setting(session, 'port')
            if not port.isdigit():
                raise ConfigFileException("'port' setting for Multivalent preParser must be an integer.")
            
        pack = self.get_setting(session, 'returnPacking')
        if not (host and port and pack):
            raise ConfigFileException("'host', 'port' and 'returnPacking' settings must be set for Multivalent preParser '%s'" % self.id)
            
        self.mvHost = host
        self.mvPort = int(port)
        self.returnPacking = pack.lower()        
        if (self.returnPacking == 'xml'):
            self.outMimeType = 'text/xml'
        else:
            self.outMimeType = 'text/plain'
        # initialise and connect to multivalent client
        self.mvClient = MultivalentClient()
        try:
            self.mvClient.connect(self.mvHost, self.mvPort)
        except:
            # (Try to connect at run time)
            pass
	atexit.register(self.close_mvServer)            
Example #8
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     lang = self.get_setting(session, 'language', 'english')
     try:
         self.stemmer = Stemmer.Stemmer(lang)
     except:
         raise(ConfigFileException("Unknown stemmer language: %s" % (lang)))
Example #9
0
    def _processPath(self, session, path):
        fp = self.get_path(session, path)
        if fp is None:
            raise ConfigFileException("No {0} file specified for object with id '{1}'.".format(path, self.id))

        if (not os.path.isabs(fp)):
            dfp = self.get_path(session, "defaultPath")
            fp = os.path.join(dfp, fp)
            
        try: fh = open(fp, 'r')
        except IOError as e:
            raise ConfigFileException("{0} for object with id '{1}'.".format(str(e), self.id))
            
        l = fh.readlines()
        fh.close()
        return l
Example #10
0
def makeObjectFromDom(session, topNode, parentObject):
    # Lots of indirections from xml to object
    objectType = None
    try:
        objectType = topNode.xpath('./objectType/text()')[0]
    except IndexError:
        # May have namespace
        try:
            objectType = topNode.xpath('./c3:objectType/text()',
                                       namespaces={'c3': CONFIG_NS})[0]
        except IndexError:
            from lxml import etree
            print etree.tostring(topNode)
    except AttributeError:
        # Not an Lxml config node
        for c in topNode.childNodes:
            if (c.nodeType == elementType and c.localName == "objectType"):
                # Here's what we want to instantiate
                objectType = getFirstData(c)
                break
    if objectType is None:
        raise (ConfigFileException('No objectType set in config file.'))
    else:
        objectType = objectType.strip()
    return buildObject(session, objectType, [topNode, parentObject])
Example #11
0
 def _walkZeeRex(self, session, node):
     if node.localName in ['indexInfo']:
         # Ignore
         return
     elif node.localName == 'serverInfo':
         self.version = node.getAttribute('version')
         for c in node.childNodes:
             self._walkZeeRex(session, c)
     elif node.localName == 'database':
         self.databaseName = str(flattenTexts(node))
     elif node.localName == 'host':
         self.host = str(flattenTexts(node))
     elif node.localName == 'port':
         self.port = int(flattenTexts(node))
     elif node.localName == 'title':
         self.title = str(flattenTexts(node))
     elif node.localName == 'contact':
         self.contacts.append(str(flattenTexts(node)))
     elif node.localName == 'schema':
         id = node.getAttribute('identifier')
         location = node.getAttribute('location')
         name = node.getAttribute('name')
         txrid = node.getAttributeNS(self.c3Namespace, 'transformer')
         if (txrid):
             txr = self.get_object(session, txrid)
             if (txr is None):
                 raise ConfigFileException(
                     "No transformer to map to for %s" % (txrid))
             self.transformerHash[id] = txr
         self.recordNamespaces[name] = id
         self.schemaLocations[id] = location
     else:
         for c in node.childNodes:
             if c.nodeType == elementType:
                 self._walkZeeRex(session, c)
Example #12
0
 def __init__(self, session, config, parent):
     PreParser.__init__(self, session, config, parent)
     self.sumType = self.get_setting(session, 'sumType', 'md5')
     try:
         hashlib.new(self.sumType)
     except ValueError as e:
         raise ConfigFileException(str(e))
Example #13
0
 def _cacheIndexes(self, session):
     storeList = self.get_path(session, 'indexStoreList')
     if not storeList:
         indexStore = self.get_path(session, 'indexStore')
         if not indexStore:
             msg = ("No indexStore/indexStoreList associated with "
                    "database: %s" % self.id)
             raise ConfigFileException(msg)
         storeList = [indexStore.id]
     else:
         storeList = storeList.split(' ')
     for (id, dom) in self.indexConfigs.iteritems():
         # see if index should be built
         if hasattr(dom, 'childNodes'):
             for c in dom.childNodes:
                 if c.nodeType == 1 and c.localName == 'paths':
                     for c2 in c.childNodes:
                         if c2.nodeType == 1 and c2.localName == 'object':
                             istore = c2.getAttributeNS(None, 'ref')
                             if istore in storeList:
                                 o = self.get_object(session, id)
                                 self.indexes[id] = o
         else:
             for c in dom.iterchildren(tag=etree.Element):
                 if c.tag in ['paths', '{%s}paths' % CONFIG_NS]:
                     for c2 in c.iterchildren(tag=etree.Element):
                         if c2.tag in ['object', '{%s}object' % CONFIG_NS]:
                             istore = c2.attrib.get(
                                 'ref',
                                 c2.attrib.get('{%s}ref' % CONFIG_NS, ''))
                             if istore in storeList:
                                 o = self.get_object(session, id)
                                 self.indexes[id] = o
Example #14
0
    def __init__(self, session, node, parent):
        self.unparsedOutput = self.get_setting(session, 'parseOutput', 0)
        tp = self.get_path(session, 'executablePath', '')
        exe = self.get_path(session, 'executable', 'geniatagger')
        if not tp:
            tp = getShellResult('which %s' % exe)
            tp = os.path.dirname(tp)
        tpe = os.path.join(tp, exe)
        if not tp:
            raise ConfigFileException("%s requires the path: "
                                      "executablePath" % self.id)
        o = os.getcwd()
        os.chdir(tp)
        if self.get_setting(session, 'tokenize', 0):
            cmd = exe
        else:
            cmd = "%s -nt" % exe
        self.pipe = Popen(cmd,
                          shell=True,
                          bufsize=1,
                          stdin=PIPE,
                          stdout=PIPE,
                          stderr=PIPE)

        l = ""
        while l != 'loading named_entity_models..done.\n':
            l = self.pipe.stderr.readline()
        os.chdir(o)
Example #15
0
    def _handleLocationNode(self, session, child):
        data = {'maps': {}, 'string': '', 'type': ''}
        xp = getFirstData(child)
        data['string'] = xp

        if child.localName == 'xpath':
            data['type'] = 'xpath'
        else:
            try:
                data['type'] = child.getAttribute('type').lower()
            except:
                raise ConfigFileException("Location element in {0} must have "
                                          "'type' attribute".format(self.id))

        if data['type'] == 'xpath':
            for a in child.attributes.keys():
                # ConfigStore using 4Suite
                if type(a) == tuple:
                    attrNode = child.attributes[a]
                    a = attrNode.name
                if (a[:6] == "xmlns:"):
                    pref = a[6:]
                    uri = child.getAttributeNS('http://www.w3.org/2000/xmlns/',
                                               pref)
                    if not uri:
                        uri = child.getAttribute(a)
                    data['maps'][pref] = uri
                else:
                    data[a] = child.getAttributeNS(None, a)
        return data
Example #16
0
 def __init__(self, session, node, parent):
     SimpleExtractor.__init__(self, session, node, parent)
     if nltk is None:
         raise MissingDependencyException(self.objectType, 'nltk')
     # Load types from config
     types = self.get_setting(session, 'entityTypes')
     if types:
         self.types = []
         for type_ in types.split():
             type_ = type_.lower()
             if type_.startswith('pe'):
                 self.types.append('PERSON')
             elif type_.startswith(('pl', 'g')):
                 self.types.append('GPE')
             elif type_.startswith(('org', 'co')):
                 self.types.append('ORGANIZATION')
             else:
                 msg = ("Unknown entity type setting {0} on {1} {2}"
                        "".format(type_, self.__class__.__name__, self.id))
                 raise ConfigFileException(msg)
     else:
         # Default to all
         self.types = ['PERSON', 'GPE', 'ORGANIZATION']
     # Should we keep the /POS tag or strip it
     self.keepPos = self.get_setting(session, 'pos', 0)
Example #17
0
    def __init__(self, session, config, parent):
        Transformer.__init__(self, session, config, parent)
        xfrPath = self.get_path(session, "xsltPath")
        if xfrPath is None:
            raise ConfigFileException("Missing path 'xsltPath' for "
                                      "{0}.".format(self.id))

        if os.path.isabs(xfrPath):
            path = xfrPath
        else:
            dfp = self.get_path(session, "defaultPath")
            path = os.path.join(dfp, xfrPath)

        ns = etree.FunctionNamespace(
            'http://www.cheshire3.org/ns/function/xsl/')
        ns['now'] = myTimeFn
        self.functionNamespace = ns
        self.parsedXslt = etree.parse(path)
        self.txr = etree.XSLT(self.parsedXslt)
        self.params = None
        parameter = self.get_setting(session, 'parameter', None)
        if (parameter):
            self.params = {}
            kv = parameter.split(' ')
            for pair in kv:
                (k, v) = pair.split(':')
                self.params[k] = '"%s"' % v
Example #18
0
    def __init__(self, session, config, parent):
        Fimi1PreParser.__init__(self, session, config, parent)
        self.minRules = self.get_setting(session, 'minRules', -1)
        self.minFIS = self.get_setting(session, 'minItemsets', -1)

        if self.minRules > 0 and self.confidence <= 0:
            raise ConfigFileException("minRules setting not allowed without "
                                      "confidence setting on %s" % (self.id))
Example #19
0
 def __init__(self, session, config, parent):
     Logger.__init__(self, session, config, parent)
     loggerList = self.get_path(session, 'loggerList')
     if loggerList is None:
         raise ConfigFileException("Missing path 'loggerList' for "
                                   "{0}.".format(self.id))
     getObj = self.parent.get_object
     self.loggers = [getObj(session, id_) for id_ in loggerList.split(' ')]
Example #20
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     lang = self.get_setting(session, 'language', 'english')
     self.punctuationRe = re.compile("((?<!s)'|[-.,]((?=\s)|$)|(^|(?<=\s))[-.,']|[~`!@+=\#\&\^*()\[\]{}\\\|\":;<>?/])")
     try:
         self.stemmer = Stemmer.Stemmer(lang)
     except:
         raise(ConfigFileException("Unknown stemmer language: %s" % (lang)))
Example #21
0
 def _handleLxmlObject(self, node):
     ref = node.attrib.get('ref', '')
     try:
         typ = node.attrib['type']
     except KeyError:
         raise ConfigFileException("Workflow element 'object' requires attribute 'type' in %s" % self.id)
     function = node.get('function', '')
     return self._handleAnonObject(ref, typ, function)
Example #22
0
 def _handleAnonObject(self, ref, typ, function):
     code = []
     if (ref):
         self.objrefs.add(ref)
         o = "self.objcache['%s']" % ref
     elif typ == 'database':
         o = "self.database"
     elif typ == 'input':
         o = "input"
     elif typ:
         code.append("obj = self.database.get_path(session, '%s')" % typ)
         o = "obj"
     else:
         raise ConfigFileException("Could not determine object")
     if not function:
         # Assume most common for object type
         try:
             function = self.fnHash[typ]
         except KeyError:
             raise ConfigFileException("No default function for "
                                       "objectType: %s" % typ)
     if (function in self.singleFunctions):
         code.append('%s.%s(session)' % (o, function))
     elif (function in self.singleInputFunctions):
         code.append('input = %s.%s(session)' % (o, function))
     elif (typ == 'index' and function == 'store_terms'):
         code.append('%s.store_terms(session, input, inRecord)' % o)
     elif typ == 'documentFactory' and function == 'load' and input is None:
         code.append('input = %s.load(session)' % o)
     elif typ == 'documentStore':
         # Check for normalizer output
         code.append('if type(input) == {}.__class__:')
         code.append('    for k in input.keys():')
         code.append('        %s.%s(session, k)' % (o, function))
         code.append('else:')
         code.append('    %s.%s(session, input)' % (o, function))
     elif typ == 'xpathProcessor':
         code.append('global inRecord')
         code.append('inRecord = input')
         code.append('input = %s.process_record(session, input)' % o)
     else:
         code.append('result = %s.%s(session, input)' % (o, function))
         code.append('if result is not None:')
         code.append('    input = result')
     return code
Example #23
0
 def __init__(self, session, config, parent):
     ARMPreParser.__init__(self, session, config, parent)
     # Check we know where TFP is etc
     self.filePath = self.get_path(session, 'filePath', None)
     if not self.filePath:
         raise ConfigFileException("%s requires the path: filePath"
                                   "" % self.id)
     self.java = self.get_path(session, 'javaPath', 'java')
     self.memory = self.get_setting(session, 'memory', 1000)
Example #24
0
 def resolvePrefix(self, name):
     if (name in self.prefixes):
         return self.prefixes[name]
     elif not name:
         # Look for default
         if not hasattr(self, 'defaultContextSet'):
             raise ConfigFileException('Zeerex does not have default '
                                       'context set.')
         default = self.defaultContextSet
         if (default in self.prefixes):
             return self.prefixes[default]
         else:
             return default
     elif (name == 'c3'):
         return 'http://www.cheshire3.org/cql-context-set/internal'
     else:
         # YYY: Should step up to other config objects?
         raise(ConfigFileException("Unknown prefix: %s" % (name)))
Example #25
0
 def __init__(self, session, config, parent):
     SimpleNormalizer.__init__(self, session, config, parent)
     self.char = self.get_setting(session, 'char', '')
     self.keep = self.get_setting(session, 'keep', 0)
     regex = self.get_setting(session, 'regexp')
     if regex:
         self.regexp = re.compile(regex)
     else:
         raise ConfigFileException('Missing regexp setting for %s.' % (self.id))
Example #26
0
 def _handleFlow(self, node):
     code = []
     for c in node.childNodes:
         if c.nodeType == elementType:
             n = c.localName
             if n == "try":
                 code.append("try:")
                 sub = self._handleFlow(c)
                 for s in sub:
                     code.append("    " + s)
             elif n == "except":
                 code.append("except Exception as err:")
                 sub = self._handleFlow(c)
                 for s in sub:
                     code.append("    " + s)
             elif n == "else":
                 code.append("else:")
                 sub = self._handleFlow(c)
                 for s in sub:
                     code.append("    " + s)
             elif n == "break":
                 code.append("break")
             elif n == "continue":
                 code.append("continue")
             elif n == "return":
                 code.append("return input")
             elif n == "raise":
                 code.append("raise")
             elif n == "assign":
                 fro = c.getAttributeNS(None, 'from')
                 to = c.getAttributeNS(None, 'to')
                 code.append("%s = %s" % (to, fro))
             elif n == "for-each":
                 fcode = self._handleForEach(c)
                 code.extend(fcode)
                 sub = self._handleFlow(c)
                 if sub:
                     for s in sub:
                         code.append("    " + s)
                 else:
                     code.append("    pass")
             elif n == "object":
                 code.extend(self._handleObject(c))
             elif n == "log":
                 code.extend(self._handleLog(c))
             elif n == "fork":
                 code.extend(self._handleFork(c))
             else:
                 try:
                     name = n.title()
                     fn = getattr(self, "_handle%s" % name)
                     code.extend(fn(c))
                 except:
                     raise ConfigFileException("Unknown workflow element: "
                                               "%s" % n)
     return code
Example #27
0
    def __init__(self, session, stream):
        # Check for dependency
        if irods is None:
            raise MissingDependencyException(
                '{0.__module__}.{0.__class__.__name__}'.format(self),
                'irods (PyRods)'
            )
        # Check for URL
        if stream.startswith(('irods://', 'rods://')):
            myEnv = parse_irodsUrl(stream)
            stream = myEnv.relpath
        else:
            # Get parameters from env
            status, myEnv = irods.getRodsEnv()
        try:
            host = myEnv.getRodsHost()
            port = myEnv.getRodsPort()
            username = myEnv.getRodsUserName()
            zone = myEnv.getRodsZone()
            home = myEnv.getRodsHome()
        except AttributeError:
            host = myEnv.rodsHost
            port = myEnv.rodsPort
            username = myEnv.rodsUserName
            zone = myEnv.rodsZone
            home = myEnv.rodsHome
        conn, errMsg = irods.rcConnect(host, port, username, zone)
        status = irods.clientLogin(conn)
        if status:
            raise ConfigFileException("Cannot connect to iRODS: ({0}) {1}"
                                      "".format(status, errMsg)
                                      )

        c = irods.irodsCollection(conn)
        self.cxn = conn
        self.coll = c
        instream = stream
        # Check if abs path to home dir
        if stream.startswith(home):
            stream = stream[len(home):]
            if stream[0] == "/":
                stream = stream[1:]
        colls = stream.split('/')
        for i, cln in enumerate(colls):
            exit_status = c.openCollection(cln)
            if exit_status < 0:
                if (
                    (i < len(colls) - 1) or
                    (cln not in [obj[0] for obj in c.getObjects()])
                ):
                    raise IOError("When opening {0}: {1} does not exists in "
                                  "collection {2}".format(instream,
                                                          cln,
                                                          c.getCollName()
                                                          )
                                  )
Example #28
0
    def _open(self, session):

        if self.cxn == None:
            # connect to iRODS
            myEnv, status = irods.getRodsEnv()
            host = self.host if self.host else myEnv.getRodsHost()
            port = self.port if self.port else myEnv.getRodsPort()
            user = self.user if self.user else myEnv.getRodsUserName()
            zone = self.zone if self.zone else myEnv.getRodsZone()
         
            conn, errMsg = irods.rcConnect(host, port, user, zone) 
            if self.passwd:
                status = irods.clientLoginWithPassword(conn, self.passwd)
            else:
                status = irods.clientLogin(conn)

            if status:
                raise ConfigFileException("Cannot connect to iRODS: (%s) %s" % (status, errMsg.getMsg()))
            self.cxn = conn
            self.env = myEnv

            resources = irods.getResources(self.cxn)
            self.resourceHash = {}
            for r in resources:
                self.resourceHash[r.getName()] = r

            
        if self.coll != None:
            # already open, just skip
            return None

        c = irods.irodsCollection(self.cxn, self.env.getRodsHome())
        self.coll = c

        # move into cheshire3 section
        path = self.get_path(session, 'irodsCollection', 'cheshire3')
        dirs = c.getSubCollections()
        if not path in dirs:
            c.createCollection(path)
        c.openCollection(path)


        if self.get_setting(session, 'createSubDir', 1):
            # now look for object's storage area
            # maybe move into database collection
            if (isinstance(self.parent, Database)):
                sc = self.parent.id
                dirs = c.getSubCollections()
                if not sc in dirs:
                    c.createCollection(sc)
                c.openCollection(sc)
            # move into store collection
            dirs = c.getSubCollections()
            if not self.id in dirs:
                c.createCollection(self.id)
            c.openCollection(self.id)
Example #29
0
 def _connect(self, session):
     try:
         cxn = psycopg2.connect("dbname={0}".format(self.database))
     except psycopg2.OperationalError as e:
         raise ConfigFileException("Cannot connect to Postgres: %r" %
                                   e.args)
     yield cxn
     # Commit transactions
     cxn.commit()
     cxn.close()
Example #30
0
        def begin_indexing(self, session, index):
            if not self.tempChunks:
                return BdbIndexStore.begin_indexing(self, session, index)
            temp = self.get_path(session, 'tempPath')
            if not os.path.isabs(temp):
                temp = os.path.join(self.get_path(session, 'defaultPath'),
                                    temp)
            self.tempPath = temp
            if (not os.path.exists(temp)):
                try:
                    os.mkdir(temp)
                except:
                    raise (ConfigFileException(
                        'TempPath does not exist and is '
                        'not creatable.'))
            elif (not os.path.isdir(temp)):
                raise (ConfigFileException('TempPath is not a directory.'))

            # Make temp files on demand, in hash
            self.outFiles[index] = {}