def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) xfrPath = self.get_path(session, "xsltPath") if xfrPath is None: raise ConfigFileException("Missing path 'xsltPath' for " "{0}.".format(self.id)) if os.path.isabs(xfrPath): path = xfrPath else: dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, xfrPath) ns = etree.FunctionNamespace( 'http://www.cheshire3.org/ns/function/xsl/') ns['now'] = myTimeFn self.functionNamespace = ns self.parsedXslt = etree.parse(path) self.txr = etree.XSLT(self.parsedXslt) self.params = None parameter = self.get_setting(session, 'parameter', None) if (parameter): self.params = {} kv = parameter.split(' ') for pair in kv: (k, v) = pair.split(':') self.params[k] = '"%s"' % v
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.session = session self.extractor = self.get_path(session, 'extractor') self.rfot = self.get_path(session, 'tokenizer') self.regexp = re.compile('[\s]+') self.eidXpath = self.get_setting(session, 'eidXpath')
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) xfrPath = self.get_path(session, "xsltPath") if xfrPath is None: raise ConfigFileException("Missing path 'xsltPath' for " "{0}.".format(self.id)) if os.path.isabs(xfrPath): path = xfrPath else: dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, xfrPath) ns = etree.FunctionNamespace( 'http://www.cheshire3.org/ns/function/xsl/' ) ns['now'] = myTimeFn self.functionNamespace = ns self.parsedXslt = etree.parse(path) self.txr = etree.XSLT(self.parsedXslt) self.params = None parameter = self.get_setting(session, 'parameter', None) if (parameter): self.params = {} kv = parameter.split(' ') for pair in kv: (k, v) = pair.split(':') self.params[k] = '"%s"' % v
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.session = session self.extractor = self.get_path(session, "extractor") self.rfot = self.get_path(session, "tokenizer") self.regexp = re.compile("[\s]+") self.eidXpath = self.get_setting(session, "eidXpath")
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.session = session self.rfot = self.get_path(session, 'tokenizer') self.tupg = self.get_path(session, 'geniaNormalizer') self.dashre = re.compile("""([^ ]+[-/*=(`] )""") self.enddashre = re.compile("""\W+[-('`]\W*$""") self.urlre = re.compile('\[[ ]*a .*?\[/a\]') self.debug = 0
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.selector = self.get_path(session, 'selector') self.extractor = self.get_path(session, 'extractor') tmplPath = self.get_path(session, "templatePath") if tmplPath is not None: dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, tmplPath) with open(path, 'r') as fh: self.template = unicode(fh.read()) else: tmpl = self.get_setting(session, 'template') self.template = unicode(tmpl)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.label = self.get_setting(session, 'label', '') if not self.label: self.labelXPath = self.get_setting(session, 'labelXPath', '') if not self.labelXPath: lxpo = self.get_setting(session, 'labelXPathProcessor', '') if not lxpo: raise ConfigFileException("No label (class) source set for %s" % (self.id)) else: # Will raise if not found self.labelXPathProcessor = db.get_object(session, lxpo) # And now get vector index self.vectorIndex = self.get_path(session, 'vectorIndex') self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1) self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1) self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1) self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1) self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1) self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1) self.minGlobalFreqPct = self.get_setting(session, 'minGlobalFreqPct', -1.0) self.maxGlobalFreqPct = self.get_setting(session, 'maxGlobalFreqPct', -1.0) self.minGlobalOccsPct = self.get_setting(session, 'minGlobalOccsPct', -1.0) self.maxGlobalOccsPct = self.get_setting(session, 'maxGlobalOccsPct', -1.0) self.minPropGlobalFreqPct = self.get_setting(session, 'minPropGlobalFreqPct', -1.0) self.maxPropGlobalFreqPct = self.get_setting(session, 'maxPropGlobalFreqPct', -1.0) self.minPropGlobalOccsPct = self.get_setting(session, 'minPropGlobalOccsPct', -1.0) self.maxPropGlobalOccsPct = self.get_setting(session, 'maxPropGlobalOccsPct', -1.0) self.minLocalFreqPct = self.get_setting(session, 'minLocalFreqPct', -1.0) self.maxLocalFreqPct = self.get_setting(session, 'maxLocalFreqPct', -1.0) self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreq', -1) self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccs', -1) self.maxNGlobalFreq = self.get_setting(session, 'maxNGlobalFreqPct', -1.0) self.maxNGlobalOccs = self.get_setting(session, 'maxNGlobalOccsPct', -1.0) self.minNLocalFreq = self.get_setting(session, 'minNLocalFreq', -1) self.maxNLocalFreq = self.get_setting(session, 'maxNLocalFreq', -1) self.idxMetadata = self.vectorIndex.fetch_metadata(session) db = session.server.get_object(session, session.database) self.totalRecords = db.totalRecords self.clear(session)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.selector = self.get_path(session, 'selector') self.extractor = self.get_path(session, 'extractor') tmplPath = self.get_path(session, "templatePath") if tmplPath is not None: dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, tmplPath) with open(path, 'r') as fh: self.template = unicode(fh.read()) else: tmpl = self.get_setting(session, 'template', '') if not tmpl: raise ConfigFileException("{0} requires either a " "'templatePath' path or a " "'template' setting." "".format(self.id)) self.template = unicode(tmpl)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.vectorIndex = self.get_path(session, 'vectorIndex') self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1) self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1) self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1) self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1) self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1) self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1) self.prox = self.get_setting(session, 'proxElement', 0) self.matches = self.get_setting(session, 'matchesOnly', 0) self.stripMatch = self.get_setting(session, 'stripMatch', 0) sw = self.get_setting(session, 'stopwords', '') ignoreTermids = [] for w in sw.split(' '): if w: try: (tid, bla, bla2) = self.vectorIndex.fetch_term( session, w, summary=True ) ignoreTermids.append(tid) except ValueError: # Term doesn't exist pass self.ignoreTermids = ignoreTermids sw = self.get_setting(session, 'reqdwords', '') mandatoryTermids = [] for w in sw.split(' '): if w: try: (tid, bla, bla2) = self.vectorIndex.fetch_term( session, w, summary=True ) mandatoryTermids.append(tid) except ValueError: # Term doesn't exist pass self.mandatoryTermids = mandatoryTermids self._clear(session)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.vectorIndex = self.get_path(session, 'vectorIndex') self.minGlobalFreq = self.get_setting(session, 'minGlobalFreq', -1) self.maxGlobalFreq = self.get_setting(session, 'maxGlobalFreq', -1) self.minGlobalOccs = self.get_setting(session, 'minGlobalOccs', -1) self.maxGlobalOccs = self.get_setting(session, 'maxGlobalOccs', -1) self.minLocalFreq = self.get_setting(session, 'minLocalFreq', -1) self.maxLocalFreq = self.get_setting(session, 'maxLocalFreq', -1) self.prox = self.get_setting(session, 'proxElement', 0) self.matches = self.get_setting(session, 'matchesOnly', 0) self.stripMatch = self.get_setting(session, 'stripMatch', 0) sw = self.get_setting(session, 'stopwords', '') ignoreTermids = [] for w in sw.split(' '): if w: try: (tid, bla, bla2) = self.vectorIndex.fetch_term(session, w, summary=True) ignoreTermids.append(tid) except ValueError: # term doesn't exist pass self.ignoreTermids = ignoreTermids sw = self.get_setting(session, 'reqdwords', '') mandatoryTermids = [] for w in sw.split(' '): if w: try: (tid, bla, bla2) = self.vectorIndex.fetch_term(session, w, summary=True) mandatoryTermids.append(tid) except ValueError: # term doesn't exist pass self.mandatoryTermids = mandatoryTermids self._clear(session)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) htag = self.get_setting(session, 'highlightTag', None) if htag is None: self.highlightTag = 'c3:highlight' self.attrs = {'xmlns:c3': "http://www.cheshire3.org/schemas/highlight/"} else: self.highlightTag = htag self.attrs = {} tagAttrs = self.get_setting(session, 'tagAttrList', None) if tagAttrs is not None: for attr in tagAttrs.split(' '): bits = attr.split('=', 1) k = bits[0] v = bits[1][1:-1] # strip off "s self.attrs[k] = v self.breakElements = self.get_setting(session, 'breakElementsList', '').split(' ')
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) xfrPath = self.get_path(session, "xsltPath") dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, xfrPath) ns = etree.FunctionNamespace('http://www.cheshire3.org/ns/function/xsl/') ns['now'] = myTimeFn self.functionNamespace = ns self.parsedXslt = etree.parse(path) self.txr = etree.XSLT(self.parsedXslt) self.params = None parameter = self.get_setting(session, 'parameter', None) if (parameter): self.params = {} kv = parameter.split(' ') for pair in kv: (k, v) = pair.split(':') self.params[k] = '"%s"' % v
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) htag = self.get_setting(session, 'highlightTag', None) if htag is None: self.highlightTag = 'c3:highlight' self.attrs = {'xmlns:c3': self.HIGHLIGHT_NS} else: self.highlightTag = htag self.attrs = {} tagAttrs = self.get_setting(session, 'tagAttrList', None) if tagAttrs is not None: for attr in tagAttrs.split(' '): bits = attr.split('=', 1) k = bits[0] v = bits[1][1:-1] # strip off "s self.attrs[k] = v self.breakElements = self.get_setting(session, 'breakElementsList', '').split(' ')
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) xfrPath = self.get_path(session, "xsltPath") dfp = self.get_path(session, "defaultPath") path = os.path.join(dfp, xfrPath) ns = etree.FunctionNamespace( 'http://www.cheshire3.org/ns/function/xsl/') ns['now'] = myTimeFn self.functionNamespace = ns self.parsedXslt = etree.parse(path) self.txr = etree.XSLT(self.parsedXslt) self.params = None parameter = self.get_setting(session, 'parameter', None) if (parameter): self.params = {} kv = parameter.split(' ') for pair in kv: (k, v) = pair.split(':') self.params[k] = '"%s"' % v
def __init__(self, session, config, parent): self.maps = [] self.tagset = "" Transformer.__init__(self, session, config, parent)
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.workflow = self.get_path(session, 'workflow')
def __init__(self, session, config, parent): Transformer.__init__(self, session, config, parent) self.session = session