def cql_search(request): from pynlpl.formats import fql, cql # парсинг входящих параметров params = json.loads(request.body.decode('utf-8')) # обновление фолиа-документа по актуальным данным doc = folia.Document(id='doc') text = folia.Text(doc, id='doc.text') sentences = Sentence.objects.all() # поиск слов в документе for s in sentences: sen = text.append(folia.Sentence(doc, id=doc.id + '.s.' + str(s.id))) words = Word.objects.filter(Sentence_id=s.id) for w in words: sen.append( folia.Word(doc, id=doc.id + '.s.' + str(s.id) + '.w.' + str(w.id), text=w.value)) doc.append(text) query = fql.Query(cql.cql2fql(params['title'])) texts = query(doc) arr = [] for t in texts: arr.append(t[0].parent.id.split('s.')[1]) sens = Sentence.objects.filter(id__in=arr) # вывод результатов return render(request, 'cabinet/cql_results.html', { 'texts': texts, 'sens': sens })
def test06_context(self): q = fql.Query(cql.cql2fql(Qcql_context6)) results = q(self.doc) self.assertTrue( len(results) > 0 ) for result in results: self.assertIsInstance(result, fql.SpanSet) self.assertEqual(len(result), 1) self.assertTrue(result[0].pos()[:2] == "VZ" or result[0].pos()[:2] == "VG" )
def test01_context(self): q = fql.Query(cql.cql2fql(Qcql_context)) results = q(self.doc) self.assertTrue(len(results) > 0) for result in results: self.assertIsInstance(result, fql.SpanSet) #print("RESULT: ", [w.text() for w in result]) self.assertEqual(len(result), 3) self.assertIsInstance(result[0], folia.Word) self.assertIsInstance(result[1], folia.Word) self.assertIsInstance(result[2], folia.Word) self.assertEqual(result[0].text(), "de") self.assertEqual(result[1].pos()[:4], "ADJ(") self.assertEqual(result[2].pos()[:2], "N(")
def test04_context(self): q = fql.Query(cql.cql2fql(Qcql_context4)) results = q(self.doc) self.assertEqual( len(results),2 ) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) textresults.append( tuple([w.text() for w in result]) ) #print(textresults,file=sys.stderr) self.assertTrue( ('genummerd','en','gedateerd') in textresults ) self.assertTrue( ('opgenomen','en','worden','weergegeven') in textresults )
def test01_context(self): q = fql.Query(cql.cql2fql(Qcql_context)) results = q(self.doc) self.assertTrue( len(results) > 0 ) for result in results: self.assertIsInstance(result, fql.SpanSet) #print("RESULT: ", [w.text() for w in result]) self.assertEqual(len(result), 3) self.assertIsInstance(result[0], folia.Word) self.assertIsInstance(result[1], folia.Word) self.assertIsInstance(result[2], folia.Word) self.assertEqual(result[0].text(), "de") self.assertEqual(result[1].pos()[:4], "ADJ(") self.assertEqual(result[2].pos()[:2], "N(")
def test03_context(self): q = fql.Query(cql.cql2fql(Qcql_context3)) results = q(self.doc) self.assertEqual(len(results), 2) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) self.assertEqual(len(result), 2) textresults.append(tuple([w.text() for w in result])) #print(textresults,file=sys.stderr) self.assertTrue(('naam', 'stemma') in textresults) self.assertTrue(('stemma', 'codicum') in textresults)
def test02_context(self): q = fql.Query(cql.cql2fql(Qcql_context2)) results = q(self.doc) self.assertTrue(len(results) > 0) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) textresults.append(tuple([w.text() for w in result])) self.assertTrue(('het', 'alfabet') in textresults) self.assertTrue(('vierkante', 'haken') in textresults) self.assertTrue(('plaats', ) in textresults) self.assertTrue(('het', 'originele', 'handschrift') in textresults) self.assertTrue(('Een', 'volle', 'lijn') in textresults)
def test03_context(self): q = fql.Query(cql.cql2fql(Qcql_context3)) results = q(self.doc) self.assertEqual( len(results), 2 ) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) self.assertEqual(len(result), 2) textresults.append( tuple([w.text() for w in result]) ) #print(textresults,file=sys.stderr) self.assertTrue( ('naam','stemma') in textresults ) self.assertTrue( ('stemma','codicum') in textresults )
def test02_context(self): q = fql.Query(cql.cql2fql(Qcql_context2)) results = q(self.doc) self.assertTrue( len(results) > 0 ) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) textresults.append( tuple([w.text() for w in result]) ) self.assertTrue( ('het','alfabet') in textresults ) self.assertTrue( ('vierkante','haken') in textresults ) self.assertTrue( ('plaats',) in textresults ) self.assertTrue( ('het','originele','handschrift') in textresults ) self.assertTrue( ('Een','volle','lijn') in textresults )
def test05_context(self): q = fql.Query(cql.cql2fql(Qcql_context5)) results = q(self.doc) self.assertTrue(len(results) > 0) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) textresults.append(tuple([w.text() for w in result])) #print(textresults,file=sys.stderr) self.assertTrue(('en', 'gedateerd', 'zodat') in textresults) self.assertTrue(('en', 'worden', 'weergegeven', 'door') in textresults) self.assertTrue(('zodat', 'ze') in textresults) self.assertTrue(('en', 'worden', 'tussen') in textresults) self.assertTrue(('terweil', 'een') in textresults)
def test05_context(self): q = fql.Query(cql.cql2fql(Qcql_context5)) results = q(self.doc) self.assertTrue( len(results) > 0 ) textresults = [] for result in results: self.assertIsInstance(result, fql.SpanSet) textresults.append( tuple([w.text() for w in result]) ) #print(textresults,file=sys.stderr) self.assertTrue( ('en','gedateerd','zodat') in textresults ) self.assertTrue( ('en','worden','weergegeven','door') in textresults ) self.assertTrue( ('zodat','ze') in textresults ) self.assertTrue( ('en','worden','tussen') in textresults ) self.assertTrue( ('terweil','een') in textresults )
def query(self, **kwargs): """Query method, all FQL queries arrive here""" if 'X-Sessionid' in cherrypy.request.headers: sid = cherrypy.request.headers['X-Sessionid'] else: sid = 'NOSID' if 'query' in kwargs: rawqueries = kwargs['query'].split("\n") else: cl = cherrypy.request.headers['Content-Length'] rawqueries = cherrypy.request.body.read(int(cl)).split("\n") if self.debug: for i,rawquery in enumerate(rawqueries): log("[QUERY INCOMING #" + str(i+1) + ", SID=" +sid + "] " + rawquery) #Get parameters for FLAT-specific return format flatargs = getflatargs(cherrypy.request.params) flatargs['debug'] = self.debug flatargs['logfunction'] = log flatargs['version'] = VERSION prevdocsel = None sessiondocsel = None queries = [] metachanges = {} for rawquery in rawqueries: try: docsel, rawquery = getdocumentselector(rawquery) if not docsel: docsel = prevdocsel self.docstore.use(docsel) if self.debug >= 2: log("[acquired lock " + "/".join(docsel)+"]") if not sessiondocsel: sessiondocsel = docsel if rawquery == "GET": query = "GET" elif rawquery == "PROBE": query = "PROBE" #gets no content data at all, but allows returning associated metadata used by FLAT, forces FLAT format else: if rawquery[:4] == "CQL ": if rawquery.find('FORMAT') != -1: end = rawquery.find('FORMAT') format = rawquery[end+7:] else: end = 9999 format = 'xml' try: query = fql.Query(cql.cql2fql(rawquery[4:end])) query.format = format except cql.SyntaxError as e : raise fql.SyntaxError("Error in CQL query: " + str(e)) elif rawquery[:5] == "META ": try: key, value = rawquery[5:].split('=',maxsplit=1) except ValueError: raise fql.SyntaxError("Expected key=value after META keyword") key = key.strip() value = value.strip() metachanges[key] = value query = None else: query = fql.Query(rawquery) if query and query.format == "python": query.format = "xml" if query and query.action and not docsel: raise fql.SyntaxError("Document Server requires USE statement prior to FQL query") except fql.SyntaxError as e: log("[QUERY ON " + "/".join(docsel) + "] " + str(rawquery)) log("[QUERY FAILED] FQL Syntax Error: " + str(e)) raise cherrypy.HTTPError(404, "FQL syntax error: " + str(e)) finally: if self.debug >= 2: log("[releasing lock " + "/".join(docsel)) self.docstore.done(docsel) if query: queries.append( (query, rawquery)) prevdocsel = docsel if metachanges: try: doc = self.docstore[docsel] except NoSuchDocument: log("[QUERY FAILED] No such document") raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1]) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=50, file=sys.stderr) print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr) log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e)) if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile) raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery) if doc.metadatatype == folia.MetaDataType.NATIVE: doc.changed = True self.docstore.lastaccess[docsel][sid] = time.time() log("[METADATA EDIT ON " + "/".join(docsel) + "]") for key, value in metachanges.items(): if value == 'NONE': del doc.metadata[key] else: doc.metadata[key] = value else: raise cherrypy.HTTPError(404, "Unable to edit metadata on document with non-native metadata type (" + "/".join(docsel)+")") else: doc = None #initialize document only if not already initialized by metadta changes results = [] #stores all results xresults = [] #stores results that should be transferred to other sessions as well, i.e. results of adds/edits prevdocid = None multidoc = False #are the queries over multiple distinct documents? format = None for query, rawquery in queries: try: doc = self.docstore[docsel] self.docstore.lastaccess[docsel][sid] = time.time() log("[QUERY ON " + "/".join(docsel) + "] " + str(rawquery)) if isinstance(query, fql.Query): if prevdocid and doc.id != prevdocid: multidoc = True result = query(doc,False,self.debug >= 2) results.append(result) #False = nowrap if query.action and query.action.action in ('EDIT','ADD','DELETE', 'SUBSTITUTE','PREPEND','APPEND'): #results of edits should be transferred to other open sessions xresults.append(result) if self.debug: log("[QUERY RESULT] " + repr(result)) format = query.format if query.action and query.action.action != "SELECT": doc.changed = True self.addtochangelog(doc, query, docsel) elif query == "GET": results.append(doc.xmlstring()) format = "single-xml" elif query == "PROBE": #no queries to perform format = "flat" else: raise Exception("Invalid query") except NoSuchDocument: if self.docstore.fail and not self.docstore.ignorefail: log("[QUERY FAILED] Document server is in lockdown due to earlier failure. Restart required!") raise cherrypy.HTTPError(403, "Document server is in lockdown due to earlier failure. Contact your FLAT administrator") else: log("[QUERY FAILED] No such document") raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1]) except fql.QueryError as e: log("[QUERY FAILED] FQL Query Error: " + str(e)) raise cherrypy.HTTPError(404, "FQL query error: " + str(e)) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=50, file=sys.stderr) log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e)) print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr) if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile) raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery) prevdocid = doc.id if not format: if metachanges: return "{\"version\":\"" + VERSION + "\"}" else: raise cherrypy.HTTPError(404, "No queries given") if format.endswith('xml'): cherrypy.response.headers['Content-Type']= 'text/xml' elif format.endswith('json'): cherrypy.response.headers['Content-Type']= 'application/json' if format == "xml": out = "<results>" + "\n".join(results) + "</results>" elif format == "json": out = "[" + ",".join(results) + "]" elif format == "flat": if sid != 'NOSID' and sessiondocsel: self.setsession(sessiondocsel[0],sessiondocsel[1],sid, xresults) cherrypy.response.headers['Content-Type']= 'application/json' if multidoc: raise "{\"version\":\""+VERSION +"\"} //multidoc response, not producing results" elif doc: log("[Parsing results for FLAT]") out = parseresults(results, doc, **flatargs) else: if len(results) > 1: raise cherrypy.HTTPError(404, "Multiple results were obtained but format dictates only one can be returned!") out = results[0] if docsel[0] == "testflat": testresult = self.docstore.save(docsel) #won't save, will run tests instead log("Test result: " +str(repr(testresult))) if format == "flat": out = json.loads(str(out,'utf-8')) out['testresult'] = testresult[0] out['testmessage'] = testresult[1] out['queries'] = rawqueries out = json.dumps(out) #unload the document, we want a fresh copy every time del self.docstore.data[('testflat','testflat')] if self.debug: if isinstance(out,bytes): log("[FINAL RESULTS] " + str(out,'utf-8')) else: log("[FINAL RESULTS] " + out) if isinstance(out,str): return out.encode('utf-8') else: return out
def query(self, **kwargs): """Query method, all FQL queries arrive here""" if 'X-sessionid' in cherrypy.request.headers: sid = cherrypy.request.headers['X-sessionid'] else: sid = 'NOSID' if 'query' in kwargs: rawqueries = kwargs['query'].split("\n") else: cl = cherrypy.request.headers['Content-Length'] rawqueries = cherrypy.request.body.read(int(cl)).split("\n") if self.debug: for i,rawquery in enumerate(rawqueries): log("[QUERY INCOMING #" + str(i+1) + "] " + rawquery) #Get parameters for FLAT-specific return format flatargs = getflatargs(cherrypy.request.params) flatargs['debug'] = self.debug flatargs['logfunction'] = log prevdocsel = None sessiondocsel = None queries = [] metachanges = {} for rawquery in rawqueries: try: docsel, rawquery = getdocumentselector(rawquery) if not docsel: docsel = prevdocsel self.docstore.use(docsel) if self.debug >= 2: log("[acquired lock " + "/".join(docsel)+"]") if not sessiondocsel: sessiondocsel = docsel if rawquery == "GET": query = "GET" elif rawquery == "PROBE": query = "PROBE" #gets no content data at all, but allows returning associated metadata used by FLAT, forces FLAT format else: if rawquery[:4] == "CQL ": if rawquery.find('FORMAT') != -1: end = rawquery.find('FORMAT') format = rawquery[end+7:] else: end = 9999 format = 'xml' try: query = fql.Query(cql.cql2fql(rawquery[4:end])) query.format = format except cql.SyntaxError as e : raise fql.SyntaxError("Error in CQL query: " + str(e)) elif rawquery[:5] == "META ": try: key, value = rawquery[5:].split('=',maxsplit=1) except ValueError: raise fql.SyntaxError("Expected key=value after META keyword") key = key.strip() value = value.strip() metachanges[key] = value query = None else: query = fql.Query(rawquery) if query and query.format == "python": query.format = "xml" if query and query.action and not docsel: raise fql.SyntaxError("Document Server requires USE statement prior to FQL query") except fql.SyntaxError as e: log("[QUERY ON " + "/".join(docsel) + "] " + str(rawquery)) log("[QUERY FAILED] FQL Syntax Error: " + str(e)) raise cherrypy.HTTPError(404, "FQL syntax error: " + str(e)) finally: if self.debug >= 2: log("[releasing lock " + "/".join(docsel)) self.docstore.done(docsel) if query: queries.append( (query, rawquery)) prevdocsel = docsel if metachanges: try: doc = self.docstore[docsel] except NoSuchDocument: log("[QUERY FAILED] No such document") raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1]) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=50, file=sys.stderr) print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr) log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e)) if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile) raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery) if doc.metadatatype == folia.MetaDataType.NATIVE: self.docstore.lastaccess[docsel][sid] = time.time() log("[METADATA EDIT ON " + "/".join(docsel) + "]") for key, value in metachanges.items(): if value == 'NONE': del doc.metadata[key] else: doc.metadata[key] = value else: raise cherrypy.HTTPError(404, "Unable to edit metadata on document with non-native metadata type (" + "/".join(docsel)+")") results = [] doc = None prevdocid = None multidoc = False #are the queries over multiple distinct documents? format = None for query, rawquery in queries: try: doc = self.docstore[docsel] self.docstore.lastaccess[docsel][sid] = time.time() log("[QUERY ON " + "/".join(docsel) + "] " + str(rawquery)) if isinstance(query, fql.Query): if prevdocid and doc.id != prevdocid: multidoc = True result = query(doc,False,self.debug >= 2) results.append(result) #False = nowrap if self.debug: log("[QUERY RESULT] " + repr(result)) format = query.format if query.action and query.action.action != "SELECT": doc.changed = True self.addtochangelog(doc, query, docsel) elif query == "GET": results.append(doc.xmlstring()) format = "single-xml" elif query == "PROBE": #no queries to perform format = "flat" else: raise Exception("Invalid query") except NoSuchDocument: log("[QUERY FAILED] No such document") raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1]) except fql.QueryError as e: log("[QUERY FAILED] FQL Query Error: " + str(e)) raise cherrypy.HTTPError(404, "FQL query error: " + str(e)) except Exception as e: exc_type, exc_value, exc_traceback = sys.exc_info() traceback.print_tb(exc_traceback, limit=50, file=sys.stderr) log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e)) print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr) if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile) raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery) prevdocid = doc.id if not format: if metachanges: return "{}" else: raise cherrypy.HTTPError(404, "No queries given") if format.endswith('xml'): cherrypy.response.headers['Content-Type']= 'text/xml' elif format.endswith('json'): cherrypy.response.headers['Content-Type']= 'application/json' if format == "xml": out = "<results>" + "\n".join(results) + "</results>" elif format == "json": out = "[" + ",".join(results) + "]" elif format == "flat": if sid != 'NOSID' and sessiondocsel and not multidoc: self.createsession(sessiondocsel[0],sessiondocsel[1],sid, results) cherrypy.response.headers['Content-Type']= 'application/json' if multidoc: raise "{} //multidoc response, not producing results" elif doc: log("[Parsing results for FLAT]") out = parseresults(results, doc, **flatargs) else: if len(results) > 1: raise cherrypy.HTTPError(404, "Multiple results were obtained but format dictates only one can be returned!") out = results[0] if docsel[0] == "testflat": testresult = self.docstore.save(docsel) #won't save, will run tests instead log("Test result: " +str(repr(testresult))) if format == "flat": out = json.loads(str(out,'utf-8')) out['testresult'] = testresult[0] out['testmessage'] = testresult[1] out['queries'] = rawqueries out = json.dumps(out) #unload the document, we want a fresh copy every time del self.docstore.data[('testflat','testflat')] if self.debug: if isinstance(out,bytes): log("[FINAL RESULTS] " + str(out,'utf-8')) else: log("[FINAL RESULTS] " + out) if isinstance(out,str): return out.encode('utf-8') else: return out