def load_form(xform, instance=None, extensions=[], preload_data={}): form = XFormParser(StringReader(xform)).parse() if instance != None: XFormParser.loadXmlInstance(form, StringReader(instance)) customhandlers.attach_handlers(form, preload_data, extensions) form.initialize(instance == None) return form
def testPayloadCopy(self): s = "how now brown cow" ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts) ts = WhitespaceTokenizer(Version.LUCENE_CURRENT, StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts)
def testPayloadCopy(self): s = "how now brown cow" ts = WhitespaceTokenizer() ts.setReader(StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts) ts = WhitespaceTokenizer() ts.setReader(StringReader(s)) ts = PayloadSetter(ts) self._verifyPayload(ts)
def load_form(xform, instance=None, extensions=None, session_data=None, api_auth=None, form_context=None, uses_sql_backend=False): """Returns an org.javarosa.core.model.FormDef Parameters ---------- xform : string String representation of an xform form_context : dictionary A hash that contains optional context for the form. Supported parameters are: 'all_case_ids' and 'case_model'. The XFormPlayer uses the context to avoid making redundant calls to CommcareHQ. """ extensions = extensions or [] session_data = session_data or {} is_editing = session_data.get("is_editing", False) form = XFormParser(StringReader(xform)).parse() if instance is not None: XFormParser(None).loadXmlInstance(form, StringReader(instance)) # retrieve preloaders out of session_data (for backwards compatibility) customhandlers.attach_handlers( form, extensions, context=session_data.get('function_context', {}), preload_data=session_data.get('preloaders', {}) ) try: session_data.get('additional_filters', {}).update({ 'use_cache': 'true', 'hsph_hack': session_data.get('case_id', None) }) form.initialize(instance is None, is_editing, CCInstances(session_data, api_auth, form_context=form_context, uses_sqlite=uses_sql_backend)) except CaseNotFound: # Touchforms repeatedly makes a call to HQ to get all the case ids in its universe. We can optimize # this by caching that call to HQ. However, when someone adds a case to that case list, we want to ensure # that that case appears in the universe of cases. Therefore we first attempt to use the cached version # of the case id list, and in the event that we cannot find a case, we try again, but do not use the cache. session_data.get('additional_filters', {}).update({'use_cache': 'false'}) form.initialize(instance is None, is_editing, CCInstances(session_data, api_auth, form_context=form_context, uses_sqlite=uses_sql_backend)) return form
def load_form(xform, instance=None, extensions=[], session_data={}, api_auth=None): form = XFormParser(StringReader(xform)).parse() if instance != None: XFormParser.loadXmlInstance(form, StringReader(instance)) # retrieve preloaders out of session_data (for backwards compatibility) customhandlers.attach_handlers(form, extensions, session_data.get('preloaders', {})) form.initialize(instance == None, CCInstances(session_data, api_auth)) return form
def parseServerNamesFromConfig(self, configContent): names = [] if configContent: try: builder = SAXBuilder(0) doc = builder.build(StringReader(configContent)) root = doc.getRootElement() processManager = root.getChildren() processManagerIterator = processManager.iterator() while processManagerIterator.hasNext(): currProcessManager = processManagerIterator.next() currElementName = currProcessManager.getName() if currElementName == 'process-manager': iasInstance = currProcessManager.getChildren() iasInstanceIterator = iasInstance.iterator() while iasInstanceIterator.hasNext(): currIasInstance = iasInstanceIterator.next() if currIasInstance.getName() == 'ias-instance': iasName = currIasInstance.getAttributeValue( 'name' ) or currIasInstance.getAttributeValue( 'id') or 'Default Server' names.append(iasName) except: logger.error('Failed to parse iAS config file.') logger.debugException('') return names
def dependencyDistanceStats(self, parsed_essay): # converts from parse text to a Java Tree object needed by the Stnaformd Parser trees_text = '(' + parsed_essay + ')' trees_reader = PennTreeReader(StringReader(trees_text), LabeledScoredTreeFactory()) tree = trees_reader.readTree() if DEBUG: print "Reconstructed parse:\n", tree.toString(), "\n" # process sentences seperately; find sentences by matching "ROOT" nodes in the parse tree sentence_pattern = TregexPattern.compile("ROOT") tree_sentences = sentence_pattern.matcher(tree) dists_sum = 0 dists_num = 0 while tree_sentences.find(): # for each sentence... tree_sentence = tree_sentences.getMatch() if DEBUG: print "Next sentence found in tree:" print tree_sentence.toString() sdd = self.sentenceDependencyDistance(tree_sentence) dists_sum += sdd dists_num += 1 if DEBUG: print "Sentence dependency distance:", sdd, "\n" dists_avg = self.calcDistsAvg(dists_sum, dists_num) if DEBUG: print "<<Essay>> dependency distance:", dists_avg, "\n" v = {'dependencyDistance': dists_avg} return v
def get_db_datasources(self, content): from NTCMD_IIS import NamedDbDataSource, DbDataSource dbDataSources = [] if content: try: document = SAXBuilder(0).build(StringReader(content)) results = document.getRootElement().getChildren('connectionStrings') if results: for result in results: connectionEntries = result.getChildren('add') for connectionEntry in connectionEntries: connectionString = connectionEntry.getAttributeValue('connectionString') if connectionString: match = re.search("dsn\s*=\s*([a-zA-Z_0-9]+);?.*", connectionString, re.I) if match: dataSource = NamedDbDataSource(match.group(1)) else: dataSource = DbDataSource(connectionString) if dataSource.isValidDataSource(): dbDataSources.append(dataSource) else: logger.debug('DB Source did not validate') except: logger.warnException('Failed getting connection info.') return dbDataSources
def buscar(self, searcher, analyzer, palabra): global listanombres, listarutas query = QueryParser("contents", analyzer).parse(palabra) scoreDocs = searcher.search(query, 50).scoreDocs listanombres = [] listarutas = [] #PARA HIGHLIGHT print "%s total matching documents." % len(scoreDocs) HighlightFormatter = SimpleHTMLFormatter() highlighter = Highlighter(HighlightFormatter, QueryScorer(query)) for scoreDoc in scoreDocs: doc = searcher.doc(scoreDoc.doc) ### text = doc.get("contents") ts = analyzer.tokenStream("contents", StringReader(text)) print doc.get("path") #print highlighter.getBestFragments(ts, text, 3, "...") print "" ### # print 'path:', doc.get("path"), 'name:', doc.get("name") listanombres.append([ doc.get("name"), doc.get("path"), highlighter.getBestFragments(ts, text, 3, "...") ])
def parsePowershellOutputXml(outputXml): xmls = getXmls(outputXml) builder = SAXBuilder(0) for xml in xmls: resultItems = [] document = builder.build(StringReader(xml)) objsElement = document.getRootElement() namespace = objsElement.getNamespace() objs = objsElement.getChildren("Obj", namespace) for obj in objs: if obj.getAttributeValue("S") == "Output": ms = obj.getChild("MS", namespace) resultItem = CmdletResultItem() if ms: properties = ms.getChildren() for property in properties: if property.getName() != "TN": name = property.getAttributeValue('N') value = property.getText() setattr(resultItem, name, value) resultItems.append(resultItem) return resultItems
def _checkToken(self, transform, input, expected): from lucene.ICUTransformFilter import ICUTransformFilter tokenizer = KeywordTokenizer() tokenizer.setReader(StringReader(input)) ts = ICUTransformFilter(tokenizer, transform) self._assertTokenStreamContents(ts, [expected])
def search(self, q, page=1, duplicates=False): query = self.parser.parse(q) if not duplicates: query = self.addDuplicatesQuery(query) perPage = 10 start = (page - 1) * perPage results = TopScoreDocCollector.create(1000, True) self.searcher.search(query, results) highlighter = Highlighter(QueryScorer(query)) highlighter.setTextFragmenter(SimpleFragmenter(40)) docs = [] for scoreDoc in results.topDocs(start, perPage).scoreDocs: doc = self.searcher.doc(scoreDoc.doc) tokenStream = self.analyzer.tokenStream( "contents", StringReader(doc['contents'])) highlight = highlighter.getBestFragments(tokenStream, doc['contents'], 3, "...") docs.append({ 'title': doc['title'], 'url': doc['url'], 'duplicate': doc['duplicate'], 'highlight': highlight }) del self.searcher totalPages = int(math.ceil(results.getTotalHits() / float(perPage))) return totalPages, docs
def disVMKernel(hostOsh, client, Framework=None, langBund=None): topologyBuilder = DiskTopologyBuilder(hostOsh, client) xml = client.execCmd( 'esxcfg-info -F xml | sed -n \'/<vmfs-filesystems>/,/<\/vmfs-filesystems>/p\' | sed -n \'1,/<\/vmfs-filesystems>/p\'' ) #Cleanup retrieved xml. Sometimes there is some debug info added xml = xml[xml.find('<'):xml.rfind('>') + 1] builder = SAXBuilder(0) document = builder.build(StringReader(xml)) rootElement = document.getRootElement() vm_filesystems = rootElement.getChildren('vm-filesystem') for vm_filesystem in vm_filesystems: mountPoint = '' size = '' fileSystem = '' usage = None vmfs_values = vm_filesystem.getChildren('value') for value in vmfs_values: if value.getAttributeValue('name') == 'console-path': mountPoint = value.getText() elif value.getAttributeValue('name') == 'size': size = value.getText() elif value.getAttributeValue('name') == 'usage': usage = value.getText() dlp_values = vm_filesystem.getChild('extents').getChild( 'disk-lun-partition').getChildren('value') for value in dlp_values: if value.getAttributeValue('name') == 'console-device': fileSystem = value.getText() topologyBuilder.handleDiskRow(fileSystem, mountPoint, size, usage) return topologyBuilder.getResultVector()
def register_script(self): """ Registers a pig scripts with its variables substituted. raises: IOException If a temp file containing the pig script could not be created. raises: ParseException The pig script could not have all its variables substituted. todo: Refactor this processes that result in calling this method. This method gets called twice for a single assert as every method that needs the data assumes no one else has called it (even methods that call other methods that call it (assertOutput() calls get_alias() which both call this method). """ pigIStream = BufferedReader(StringReader(self.orig_pig_code)) pigOStream = StringWriter() ps = ParameterSubstitutionPreprocessor(50) # Where does 50 come from? ps.genSubstitutedFile(pigIStream, pigOStream, self.args, self.arg_files) substitutedPig = pigOStream.toString() f = File.createTempFile("tmp", "pigunit") pw = PrintWriter(f) pw.println(substitutedPig) pw.close() pigSubstitutedFile = f.getCanonicalPath() self._temp_pig_script = pigSubstitutedFile self.pig.registerScript(pigSubstitutedFile, self.alias_overrides)
def parseTextToParseTreeObj(self, parsed_essay): # converts from parse text to a Java Tree object needed by the Stnaformd Parser & Tregex parse_trees_text = '('+parsed_essay+ ')' tree_reader = PennTreeReader( StringReader(parse_trees_text), LabeledScoredTreeFactory() ) new_tree_obj = tree_reader.readTree() # print "Reconstructed parse:\n", new_tree_obj.toString() return new_tree_obj
def disVMKernel(hostId, shell, Framework=None, langBund=None): ''' Discover physical memory on VMKernel str, Shell, Framework, Properties -> oshVector @raise ValueError: memory size is not a digit @command: esxcfg-info -F xml | sed -n \'/<memory-info>/,/<\/memory-info>/p\' ''' resVec = ObjectStateHolderVector() hostOsh = modeling.createOshByCmdbIdString('host', hostId) xml = shell.execCmd( 'esxcfg-info -F xml | sed -n \'/<memory-info>/,/<\/memory-info>/p\' | sed -n \'1,/<\/memory-info>/p\'' ) #Cleanup retrieved xml. Sometimes there is some debug info added xml = xml[xml.find('<'):xml.rfind('>') + 1] builder = SAXBuilder(0) document = builder.build(StringReader(xml)) rootElement = document.getRootElement() memory_values = rootElement.getChild( 'aux-source-memory-stats').getChildren('value') for value in memory_values: if value.getAttributeValue('name') == 'physical-memory-est.': memorySizeInKilobytes = int(value.getText()) memory.report(resVec, hostOsh, memorySizeInKilobytes) #TODO: Implement swap discovery for vmkernel resVec.add(hostOsh) return resVec
def authorizeFortress(self, user_name): responseData = self.getFortressResponse(user_name) print responseData dbf = DocumentBuilderFactory.newInstance() fortressAuthenticated = False db = None try: db = dbf.newDocumentBuilder() print responseData isource = InputSource() isource.setCharacterStream(StringReader(responseData)) try: doc = db.parse(isource) errorCode = doc.getElementsByTagName("errorCode") valueSet = doc.getElementsByTagName("valueSet") if (errorCode.item(0).getTextContent() == "0" and self.roleExists("GLUU_ADMIN", valueSet)): return True else: fortressAuthenticated = False except Exception, ex: print "Exception" except ParserConfigurationException, pce: print pce
def test_indexDocumentWithUnicodeText(self): store = self.openStore() writer = None try: analyzer = self.getAnalyzer() writer = self.getWriter(store, analyzer, True) doc = Document() doc.add(Field("title", "value of testing", TextField.TYPE_STORED)) doc.add(Field("docid", str(1), StringField.TYPE_NOT_STORED)) doc.add(Field("owner", "unittester", StringField.TYPE_STORED)) doc.add(Field("search_name", "wisdom", StoredField.TYPE)) doc.add( Field("meta_words", "rabbits are beautiful", TextField.TYPE_NOT_STORED)) # using a unicode body cause problems, which seems very odd # since the python type is the same regardless affter doing # the encode body_text = u"hello world" * 20 body_reader = StringReader(body_text) doc.add(Field("content", body_reader, TextField.TYPE_NOT_STORED)) writer.addDocument(doc) finally: self.closeStore(store, writer)
def processXmlAndCreateGRLoaderXmlFiles(operation, xmlResult): fileName = "%s-%s.xml" % (TIMESTAMP, operation) doc = Document() saxBuilder = SAXBuilder() xmlData = saxBuilder.build(StringReader(xmlResult)) try: # Process CIs cisData = xmlData.getRootElement().getChild('data').getChild( 'objects').getChildren('Object') (mamIdToAttributesMap, itemsRootElement) = addCIs(Element('GRLoader'), cisData) # Process Relationships relationsData = xmlData.getRootElement().getChild('data').getChild( 'links').getChildren('link') itemsRootElementWithLinks = addRelations(itemsRootElement, relationsData, mamIdToAttributesMap) if itemsRootElementWithLinks != None: doc.setRootElement(itemsRootElementWithLinks) createGRLoaderXmlInputFile(fileName, doc) except: raise Exception, "Unable to process inbound XML" return fileName
def readConfig(self, fh=None, xml=None): self.fh = fh self.xml = xml self.tree = None self.doc = None self.cellTree = None if self.fh != None: self.logger.info("readConfig: processing xml file") self.logger.debug("readConfig: file %s " % fh) input = FileInputStream(self.fh) fhtree = builder.parse(input) self.tree = fhtree.getDocumentElement() self.cellTree = fhtree.getElementById('Cell') elif self.xml != None: self.logger.info("readConfig: processing xml String") str = StringReader(xml) strstrm = InputSource(str) self.doc = builder.parse(strstrm) self.tree = self.doc.getDocumentElement() self.cellTree = self.doc.getElementById('Cell') else: self.logger.error("readConfig: You did not supply a valid xml file handle or xml string to the readConfig method.") raise ProcessConfigException("readConfig: You did not supply a valid xml file handle or xml string to the readConfig method.") self.logger.debug("readConfig: self.tree = %s" % (self.tree)) self.treeDict = [] self.logger.debug("readConfig: processing base tree elements") self.logger.debug("readConfig: self.tree: %s " % self.tree) self.walkXMLTree(self.tree, 0) self.logger.debug("readConfig: self.treeDict = %s" % (self.treeDict)) return self.treeDict
def go( query ): lexer = QueryLexer( StringReader(query) ) parser = QueryParser( lexer ) try: parser.query() except ANTLRException, details: print details return
def tokenize_string(self, analyzer, string): result = [] stream = analyzer.tokenStream(None, StringReader(string)) cattr = stream.addAttribute(CharTermAttribute) stream.reset() while stream.incrementToken(): result.append(cattr.toString()) stream.close() return result
def parse(self, essay): parsed_essay = "" parsed_essay_objs = [] for sentence in process.DocumentPreprocessor(StringReader(essay)): parse = self.lxparser.apply(sentence) parsed_essay += " " + parse.toString() parsed_essay_objs.append(parse) # print "." return (parsed_essay, parsed_essay_objs)
def extractRootElement(self): """Setup an XML parser """ dbf = DocumentBuilderFactory.newInstance() dbf.setNamespaceAware(1) domparser = dbf.newDocumentBuilder() doc = domparser.parse(InputSource(StringReader(self.app.errorsData))) rootElement = doc.getDocumentElement() return rootElement
def testPorter(): print 'lucene', lucene.VERSION, lucene.CLASSPATH input = 'this is a test string for Analyzer' analyzer = PorterStemmerAnalyzer() ts = analyzer.tokenStream("dummy", StringReader(input)) ts.reset(); ##Resets this stream to the beginning. (Required while ts.incrementToken(): #print ts.r #print ts.reflectAsString(True) print termAtt.toString(), offsetAtt.startOffset(), offsetAtt.endOffset()
def __init__(self,modstr="",ring=None): '''Solvable module constructor. ''' if ring == None: sr = StringReader( modstr ); tok = GenPolynomialTokenizer(sr); self.mset = tok.nextSolvableSubModuleSet(); else: self.mset = ModuleList(ring,None); self.ring = self.mset.ring;
def _assertAnalyzesTo(self, a, input, output, startOffsets=None, endOffsets=None, posIncrements=None): ts = a.tokenStream("dummy", StringReader(input)) self._assertTokenStreamContents(ts, output, startOffsets, endOffsets, None, posIncrements)
def __init__(self,ring,ringstr="",list=None): '''Constructor for an ideal in a solvable polynomial ring. ''' self.ring = ring; if list == None: sr = StringReader( ringstr ); tok = GenPolynomialTokenizer(ring.ring,sr); self.list = tok.nextSolvablePolynomialList(); else: self.list = pylist2arraylist(list); self.pset = OrderedPolynomialList(ring.ring,self.list);
def __init__(self,ringstr="",ring=None): '''Ring constructor. ''' if ring == None: sr = StringReader( ringstr ); tok = GenPolynomialTokenizer(sr); self.pset = tok.nextPolynomialSet(); self.ring = self.pset.ring; else: self.ring = ring; self.engine = GCDFactory.getProxy(self.ring.coFac);
def __init__(self,ring,polystr="",list=None): '''Ideal constructor. ''' self.ring = ring; if list == None: sr = StringReader( polystr ); tok = GenPolynomialTokenizer(ring.pset.ring,sr); self.list = tok.nextPolynomialList(); else: self.list = pylist2arraylist(list); self.pset = OrderedPolynomialList(ring.ring,self.list);