Beispiel #1
0
def cql_search(request):
    from pynlpl.formats import fql, cql
    # парсинг входящих параметров
    params = json.loads(request.body.decode('utf-8'))
    # обновление фолиа-документа по актуальным данным
    doc = folia.Document(id='doc')
    text = folia.Text(doc, id='doc.text')
    sentences = Sentence.objects.all()
    # поиск слов в документе
    for s in sentences:
        sen = text.append(folia.Sentence(doc, id=doc.id + '.s.' + str(s.id)))
        words = Word.objects.filter(Sentence_id=s.id)
        for w in words:
            sen.append(
                folia.Word(doc,
                           id=doc.id + '.s.' + str(s.id) + '.w.' + str(w.id),
                           text=w.value))
    doc.append(text)
    query = fql.Query(cql.cql2fql(params['title']))
    texts = query(doc)
    arr = []
    for t in texts:
        arr.append(t[0].parent.id.split('s.')[1])
    sens = Sentence.objects.filter(id__in=arr)
    # вывод результатов
    return render(request, 'cabinet/cql_results.html', {
        'texts': texts,
        'sens': sens
    })
Beispiel #2
0
    def test06_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context6))
        results = q(self.doc)
        self.assertTrue( len(results) > 0 )

        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            self.assertEqual(len(result), 1)
            self.assertTrue(result[0].pos()[:2] == "VZ" or result[0].pos()[:2] == "VG" )
Beispiel #3
0
 def test01_context(self):
     q = fql.Query(cql.cql2fql(Qcql_context))
     results = q(self.doc)
     self.assertTrue(len(results) > 0)
     for result in results:
         self.assertIsInstance(result, fql.SpanSet)
         #print("RESULT: ", [w.text() for w in result])
         self.assertEqual(len(result), 3)
         self.assertIsInstance(result[0], folia.Word)
         self.assertIsInstance(result[1], folia.Word)
         self.assertIsInstance(result[2], folia.Word)
         self.assertEqual(result[0].text(), "de")
         self.assertEqual(result[1].pos()[:4], "ADJ(")
         self.assertEqual(result[2].pos()[:2], "N(")
Beispiel #4
0
    def test04_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context4))
        results = q(self.doc)
        self.assertEqual( len(results),2  )

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            textresults.append(  tuple([w.text() for w in result]) )

        #print(textresults,file=sys.stderr)

        self.assertTrue( ('genummerd','en','gedateerd') in textresults )
        self.assertTrue( ('opgenomen','en','worden','weergegeven') in textresults )
Beispiel #5
0
 def test01_context(self):
     q = fql.Query(cql.cql2fql(Qcql_context))
     results = q(self.doc)
     self.assertTrue( len(results) > 0 )
     for result in results:
         self.assertIsInstance(result, fql.SpanSet)
         #print("RESULT: ", [w.text() for w in result])
         self.assertEqual(len(result), 3)
         self.assertIsInstance(result[0], folia.Word)
         self.assertIsInstance(result[1], folia.Word)
         self.assertIsInstance(result[2], folia.Word)
         self.assertEqual(result[0].text(), "de")
         self.assertEqual(result[1].pos()[:4], "ADJ(")
         self.assertEqual(result[2].pos()[:2], "N(")
Beispiel #6
0
    def test03_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context3))
        results = q(self.doc)
        self.assertEqual(len(results), 2)

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            self.assertEqual(len(result), 2)
            textresults.append(tuple([w.text() for w in result]))

        #print(textresults,file=sys.stderr)

        self.assertTrue(('naam', 'stemma') in textresults)
        self.assertTrue(('stemma', 'codicum') in textresults)
Beispiel #7
0
    def test02_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context2))
        results = q(self.doc)
        self.assertTrue(len(results) > 0)

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            textresults.append(tuple([w.text() for w in result]))

        self.assertTrue(('het', 'alfabet') in textresults)
        self.assertTrue(('vierkante', 'haken') in textresults)
        self.assertTrue(('plaats', ) in textresults)
        self.assertTrue(('het', 'originele', 'handschrift') in textresults)
        self.assertTrue(('Een', 'volle', 'lijn') in textresults)
Beispiel #8
0
    def test03_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context3))
        results = q(self.doc)
        self.assertEqual( len(results), 2 )

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            self.assertEqual(len(result), 2)
            textresults.append(  tuple([w.text() for w in result]) )

        #print(textresults,file=sys.stderr)

        self.assertTrue( ('naam','stemma') in textresults )
        self.assertTrue( ('stemma','codicum') in textresults )
Beispiel #9
0
    def test02_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context2))
        results = q(self.doc)
        self.assertTrue( len(results) > 0 )

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            textresults.append(  tuple([w.text() for w in result]) )

        self.assertTrue( ('het','alfabet') in textresults )
        self.assertTrue( ('vierkante','haken') in textresults )
        self.assertTrue( ('plaats',) in textresults )
        self.assertTrue( ('het','originele','handschrift') in textresults )
        self.assertTrue( ('Een','volle','lijn') in textresults )
Beispiel #10
0
    def test05_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context5))
        results = q(self.doc)
        self.assertTrue(len(results) > 0)

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            textresults.append(tuple([w.text() for w in result]))

        #print(textresults,file=sys.stderr)

        self.assertTrue(('en', 'gedateerd', 'zodat') in textresults)
        self.assertTrue(('en', 'worden', 'weergegeven', 'door') in textresults)
        self.assertTrue(('zodat', 'ze') in textresults)
        self.assertTrue(('en', 'worden', 'tussen') in textresults)
        self.assertTrue(('terweil', 'een') in textresults)
Beispiel #11
0
    def test05_context(self):
        q = fql.Query(cql.cql2fql(Qcql_context5))
        results = q(self.doc)
        self.assertTrue( len(results) > 0 )

        textresults = []
        for result in results:
            self.assertIsInstance(result, fql.SpanSet)
            textresults.append(  tuple([w.text() for w in result]) )

        #print(textresults,file=sys.stderr)

        self.assertTrue( ('en','gedateerd','zodat') in textresults )
        self.assertTrue( ('en','worden','weergegeven','door') in textresults )
        self.assertTrue( ('zodat','ze') in textresults )
        self.assertTrue( ('en','worden','tussen') in textresults )
        self.assertTrue( ('terweil','een') in textresults )
    def query(self, **kwargs):
        """Query method, all FQL queries arrive here"""

        if 'X-Sessionid' in cherrypy.request.headers:
            sid = cherrypy.request.headers['X-Sessionid']
        else:
            sid = 'NOSID'

        if 'query' in kwargs:
            rawqueries = kwargs['query'].split("\n")
        else:
            cl = cherrypy.request.headers['Content-Length']
            rawqueries = cherrypy.request.body.read(int(cl)).split("\n")

        if self.debug:
            for i,rawquery in enumerate(rawqueries):
                log("[QUERY INCOMING #" + str(i+1) + ", SID=" +sid + "] " + rawquery)

        #Get parameters for FLAT-specific return format
        flatargs = getflatargs(cherrypy.request.params)
        flatargs['debug'] = self.debug
        flatargs['logfunction'] = log
        flatargs['version'] = VERSION

        prevdocsel = None
        sessiondocsel = None
        queries = []
        metachanges = {}
        for rawquery in rawqueries:
            try:
                docsel, rawquery = getdocumentselector(rawquery)
                if not docsel: docsel = prevdocsel
                self.docstore.use(docsel)
                if self.debug >= 2: log("[acquired lock " + "/".join(docsel)+"]")
                if not sessiondocsel: sessiondocsel = docsel
                if rawquery == "GET":
                    query = "GET"
                elif rawquery == "PROBE":
                    query = "PROBE" #gets no content data at all, but allows returning associated metadata used by FLAT, forces FLAT format
                else:
                    if rawquery[:4] == "CQL ":
                        if rawquery.find('FORMAT') != -1:
                            end = rawquery.find('FORMAT')
                            format = rawquery[end+7:]
                        else:
                            end = 9999
                            format = 'xml'
                        try:
                            query = fql.Query(cql.cql2fql(rawquery[4:end]))
                            query.format = format
                        except cql.SyntaxError as e :
                            raise fql.SyntaxError("Error in CQL query: " + str(e))
                    elif rawquery[:5] == "META ":
                        try:
                            key, value = rawquery[5:].split('=',maxsplit=1)
                        except ValueError:
                            raise fql.SyntaxError("Expected key=value after META keyword")
                        key = key.strip()
                        value = value.strip()
                        metachanges[key] = value
                        query = None
                    else:
                        query = fql.Query(rawquery)
                    if query and query.format == "python":
                        query.format = "xml"
                    if query and query.action and not docsel:
                        raise fql.SyntaxError("Document Server requires USE statement prior to FQL query")
            except fql.SyntaxError as e:
                log("[QUERY ON " + "/".join(docsel)  + "] " + str(rawquery))
                log("[QUERY FAILED] FQL Syntax Error: " + str(e))
                raise cherrypy.HTTPError(404, "FQL syntax error: " + str(e))
            finally:
                if self.debug >= 2: log("[releasing lock " + "/".join(docsel))
                self.docstore.done(docsel)

            if query:
                queries.append( (query, rawquery))
            prevdocsel = docsel


        if metachanges:
            try:
                doc = self.docstore[docsel]
            except NoSuchDocument:
                log("[QUERY FAILED] No such document")
                raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1])
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_tb(exc_traceback, limit=50, file=sys.stderr)
                print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr)
                log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e))
                if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile)
                raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery)

            if doc.metadatatype == folia.MetaDataType.NATIVE:
                doc.changed = True
                self.docstore.lastaccess[docsel][sid] = time.time()
                log("[METADATA EDIT ON " + "/".join(docsel)  + "]")
                for key, value in metachanges.items():
                    if value == 'NONE':
                        del doc.metadata[key]
                    else:
                        doc.metadata[key] = value
            else:
                raise cherrypy.HTTPError(404, "Unable to edit metadata on document with non-native metadata type (" + "/".join(docsel)+")")
        else:
            doc = None #initialize document only if not already initialized by metadta changes


        results = [] #stores all results
        xresults = [] #stores results that should be transferred to other sessions as well, i.e. results of adds/edits
        prevdocid = None
        multidoc = False #are the queries over multiple distinct documents?
        format = None
        for query, rawquery in queries:
            try:
                doc = self.docstore[docsel]
                self.docstore.lastaccess[docsel][sid] = time.time()
                log("[QUERY ON " + "/".join(docsel)  + "] " + str(rawquery))
                if isinstance(query, fql.Query):
                    if prevdocid and doc.id != prevdocid:
                        multidoc = True
                    result =  query(doc,False,self.debug >= 2)
                    results.append(result) #False = nowrap
                    if query.action and query.action.action in ('EDIT','ADD','DELETE', 'SUBSTITUTE','PREPEND','APPEND'):
                        #results of edits should be transferred to other open sessions
                        xresults.append(result)
                    if self.debug:
                        log("[QUERY RESULT] " + repr(result))
                    format = query.format
                    if query.action and query.action.action != "SELECT":
                        doc.changed = True
                        self.addtochangelog(doc, query, docsel)
                elif query == "GET":
                    results.append(doc.xmlstring())
                    format = "single-xml"
                elif query == "PROBE":
                    #no queries to perform
                    format = "flat"
                else:
                    raise Exception("Invalid query")
            except NoSuchDocument:
                if self.docstore.fail and not self.docstore.ignorefail:
                    log("[QUERY FAILED] Document server is in lockdown due to earlier failure. Restart required!")
                    raise cherrypy.HTTPError(403, "Document server is in lockdown due to earlier failure. Contact your FLAT administrator")
                else:
                    log("[QUERY FAILED] No such document")
                    raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1])
            except fql.QueryError as e:
                log("[QUERY FAILED] FQL Query Error: " + str(e))
                raise cherrypy.HTTPError(404, "FQL query error: " + str(e))
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_tb(exc_traceback, limit=50, file=sys.stderr)
                log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e))
                print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr)
                if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile)
                raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery)
            prevdocid = doc.id

        if not format:
            if metachanges:
                return "{\"version\":\"" + VERSION + "\"}"
            else:
                raise cherrypy.HTTPError(404, "No queries given")
        if format.endswith('xml'):
            cherrypy.response.headers['Content-Type']= 'text/xml'
        elif format.endswith('json'):
            cherrypy.response.headers['Content-Type']= 'application/json'


        if format == "xml":
            out = "<results>" + "\n".join(results) + "</results>"
        elif format == "json":
            out = "[" + ",".join(results) + "]"
        elif format == "flat":
            if sid != 'NOSID' and sessiondocsel:
                self.setsession(sessiondocsel[0],sessiondocsel[1],sid, xresults)
            cherrypy.response.headers['Content-Type']= 'application/json'
            if multidoc:
                raise "{\"version\":\""+VERSION +"\"} //multidoc response, not producing results"
            elif doc:
                log("[Parsing results for FLAT]")
                out =  parseresults(results, doc, **flatargs)
        else:
            if len(results) > 1:
                raise cherrypy.HTTPError(404, "Multiple results were obtained but format dictates only one can be returned!")
            out = results[0]


        if docsel[0] == "testflat":
            testresult = self.docstore.save(docsel) #won't save, will run tests instead
            log("Test result: " +str(repr(testresult)))


            if format == "flat":
                out = json.loads(str(out,'utf-8'))
                out['testresult'] = testresult[0]
                out['testmessage'] = testresult[1]
                out['queries'] = rawqueries
                out = json.dumps(out)

            #unload the document, we want a fresh copy every time
            del self.docstore.data[('testflat','testflat')]

        if self.debug:
            if isinstance(out,bytes):
                log("[FINAL RESULTS] " + str(out,'utf-8'))
            else:
                log("[FINAL RESULTS] " + out)

        if isinstance(out,str):
            return out.encode('utf-8')
        else:
            return out
    def query(self, **kwargs):
        """Query method, all FQL queries arrive here"""

        if 'X-sessionid' in cherrypy.request.headers:
            sid = cherrypy.request.headers['X-sessionid']
        else:
            sid = 'NOSID'

        if 'query' in kwargs:
            rawqueries = kwargs['query'].split("\n")
        else:
            cl = cherrypy.request.headers['Content-Length']
            rawqueries = cherrypy.request.body.read(int(cl)).split("\n")

        if self.debug:
            for i,rawquery in enumerate(rawqueries):
                log("[QUERY INCOMING #" + str(i+1) + "] " + rawquery)

        #Get parameters for FLAT-specific return format
        flatargs = getflatargs(cherrypy.request.params)
        flatargs['debug'] = self.debug
        flatargs['logfunction'] = log

        prevdocsel = None
        sessiondocsel = None
        queries = []
        metachanges = {}
        for rawquery in rawqueries:
            try:
                docsel, rawquery = getdocumentselector(rawquery)
                if not docsel: docsel = prevdocsel
                self.docstore.use(docsel)
                if self.debug >= 2: log("[acquired lock " + "/".join(docsel)+"]")
                if not sessiondocsel: sessiondocsel = docsel
                if rawquery == "GET":
                    query = "GET"
                elif rawquery == "PROBE":
                    query = "PROBE" #gets no content data at all, but allows returning associated metadata used by FLAT, forces FLAT format
                else:
                    if rawquery[:4] == "CQL ":
                        if rawquery.find('FORMAT') != -1:
                            end = rawquery.find('FORMAT')
                            format = rawquery[end+7:]
                        else:
                            end = 9999
                            format = 'xml'
                        try:
                            query = fql.Query(cql.cql2fql(rawquery[4:end]))
                            query.format = format
                        except cql.SyntaxError as e :
                            raise fql.SyntaxError("Error in CQL query: " + str(e))
                    elif rawquery[:5] == "META ":
                        try:
                            key, value = rawquery[5:].split('=',maxsplit=1)
                        except ValueError:
                            raise fql.SyntaxError("Expected key=value after META keyword")
                        key = key.strip()
                        value = value.strip()
                        metachanges[key] = value
                        query = None
                    else:
                        query = fql.Query(rawquery)
                    if query and query.format == "python":
                        query.format = "xml"
                    if query and query.action and not docsel:
                        raise fql.SyntaxError("Document Server requires USE statement prior to FQL query")
            except fql.SyntaxError as e:
                log("[QUERY ON " + "/".join(docsel)  + "] " + str(rawquery))
                log("[QUERY FAILED] FQL Syntax Error: " + str(e))
                raise cherrypy.HTTPError(404, "FQL syntax error: " + str(e))
            finally:
                if self.debug >= 2: log("[releasing lock " + "/".join(docsel))
                self.docstore.done(docsel)

            if query:
                queries.append( (query, rawquery))
            prevdocsel = docsel


        if metachanges:
            try:
                doc = self.docstore[docsel]
            except NoSuchDocument:
                log("[QUERY FAILED] No such document")
                raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1])
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_tb(exc_traceback, limit=50, file=sys.stderr)
                print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr)
                log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e))
                if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile)
                raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery)

            if doc.metadatatype == folia.MetaDataType.NATIVE:
                self.docstore.lastaccess[docsel][sid] = time.time()
                log("[METADATA EDIT ON " + "/".join(docsel)  + "]")
                for key, value in metachanges.items():
                    if value == 'NONE':
                        del doc.metadata[key]
                    else:
                        doc.metadata[key] = value
            else:
                raise cherrypy.HTTPError(404, "Unable to edit metadata on document with non-native metadata type (" + "/".join(docsel)+")")


        results = []
        doc = None
        prevdocid = None
        multidoc = False #are the queries over multiple distinct documents?
        format = None
        for query, rawquery in queries:
            try:
                doc = self.docstore[docsel]
                self.docstore.lastaccess[docsel][sid] = time.time()
                log("[QUERY ON " + "/".join(docsel)  + "] " + str(rawquery))
                if isinstance(query, fql.Query):
                    if prevdocid and doc.id != prevdocid:
                        multidoc = True
                    result =  query(doc,False,self.debug >= 2)
                    results.append(result) #False = nowrap
                    if self.debug:
                        log("[QUERY RESULT] " + repr(result))
                    format = query.format
                    if query.action and query.action.action != "SELECT":
                        doc.changed = True
                        self.addtochangelog(doc, query, docsel)
                elif query == "GET":
                    results.append(doc.xmlstring())
                    format = "single-xml"
                elif query == "PROBE":
                    #no queries to perform
                    format = "flat"
                else:
                    raise Exception("Invalid query")
            except NoSuchDocument:
                log("[QUERY FAILED] No such document")
                raise cherrypy.HTTPError(404, "Document not found: " + docsel[0] + "/" + docsel[1])
            except fql.QueryError as e:
                log("[QUERY FAILED] FQL Query Error: " + str(e))
                raise cherrypy.HTTPError(404, "FQL query error: " + str(e))
            except Exception as e:
                exc_type, exc_value, exc_traceback = sys.exc_info()
                traceback.print_tb(exc_traceback, limit=50, file=sys.stderr)
                log("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e))
                print("[QUERY FAILED] FoLiA Error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e), file=sys.stderr)
                if logfile: traceback.print_tb(exc_traceback, limit=50, file=logfile)
                raise cherrypy.HTTPError(404, "FoLiA error in " + "/".join(docsel) + ": [" + e.__class__.__name__ + "] " + str(e) + "\n\nQuery was: " + rawquery)
            prevdocid = doc.id

        if not format:
            if metachanges:
                return "{}"
            else:
                raise cherrypy.HTTPError(404, "No queries given")
        if format.endswith('xml'):
            cherrypy.response.headers['Content-Type']= 'text/xml'
        elif format.endswith('json'):
            cherrypy.response.headers['Content-Type']= 'application/json'


        if format == "xml":
            out = "<results>" + "\n".join(results) + "</results>"
        elif format == "json":
            out = "[" + ",".join(results) + "]"
        elif format == "flat":
            if sid != 'NOSID' and sessiondocsel and not multidoc:
                self.createsession(sessiondocsel[0],sessiondocsel[1],sid, results)
            cherrypy.response.headers['Content-Type']= 'application/json'
            if multidoc:
                raise "{} //multidoc response, not producing results"
            elif doc:
                log("[Parsing results for FLAT]")
                out =  parseresults(results, doc, **flatargs)
        else:
            if len(results) > 1:
                raise cherrypy.HTTPError(404, "Multiple results were obtained but format dictates only one can be returned!")
            out = results[0]


        if docsel[0] == "testflat":
            testresult = self.docstore.save(docsel) #won't save, will run tests instead
            log("Test result: " +str(repr(testresult)))


            if format == "flat":
                out = json.loads(str(out,'utf-8'))
                out['testresult'] = testresult[0]
                out['testmessage'] = testresult[1]
                out['queries'] = rawqueries
                out = json.dumps(out)

            #unload the document, we want a fresh copy every time
            del self.docstore.data[('testflat','testflat')]

        if self.debug:
            if isinstance(out,bytes):
                log("[FINAL RESULTS] " + str(out,'utf-8'))
            else:
                log("[FINAL RESULTS] " + out)

        if isinstance(out,str):
            return out.encode('utf-8')
        else:
            return out