def obj_get(self, bundle, **kwargs): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) document = gdb.nodes.get(GRAPH_DATABASE_REST_URL + "node/" + kwargs['pk'] + '/') new_obj = DataObject(kwargs['pk']) new_obj.__dict__['_data'] = document.properties new_obj.__dict__['_data']['id'] = kwargs['pk'] sentences = gdb.query( """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE d.CTS='""" + document.properties['CTS'] + """' RETURN DISTINCT s ORDER BY ID(s)""") sentenceArray = [] for s in sentences: sent = s[0] url = sent['self'].split('/') # this might seems a little hacky, but API resources are very decoupled, # which gives us great performance instead of creating relations amongst objects and referencing/dereferencing foreign keyed fields sent['data']['resource_uri'] = API_PATH + 'sentence/' + url[ len(url) - 1] + '/' sentenceArray.append(sent['data']) if ENABLE_DISPLAYING_LONG_DOCUMENTS: if len(sentenceArray) > 500: new_obj.__dict__['_data']['sentences'] = sentenceArray else: new_obj.__dict__['_data']['sentences'] = sort_sentences( sentenceArray) else: new_obj.__dict__['_data']['sentences'] = sort_sentences( sentenceArray) # get a dictionary of related translations of this document relatedDocuments = gdb.query( """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`)-[:words]->(w:`Word`)-[:translation]->(t:`Word`)<-[:words]-(s1:`Sentence`)<-[:sentences]-(d1:`Document`) WHERE HAS (d.CTS) AND d.CTS='""" + document.properties['CTS'] + """' RETURN DISTINCT d1 ORDER BY ID(d1)""") new_obj.__dict__['_data']['translations'] = {} for rd in relatedDocuments: doc = rd[0] url = doc['self'].split('/') if doc['data']['lang'] in CTS_LANG: new_obj.__dict__['_data']['translations'][ doc['data']['lang']] = doc['data'] new_obj.__dict__['_data']['translations'][doc['data']['lang']][ 'resource_uri'] = API_PATH + 'document/' + url[len(url) - 1] + '/' return new_obj
def obj_get(self, bundle, **kwargs): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) document = gdb.nodes.get(GRAPH_DATABASE_REST_URL + "node/" + kwargs['pk'] + '/') new_obj = DataObject(kwargs['pk']) new_obj.__dict__['_data'] = document.properties new_obj.__dict__['_data']['id'] = kwargs['pk'] sentences = gdb.query("""MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE d.CTS='""" +document.properties['CTS']+ """' RETURN DISTINCT s ORDER BY ID(s)""") sentenceArray = [] for s in sentences: sent = s[0] url = sent['self'].split('/') # this might seems a little hacky, but API resources are very decoupled, # which gives us great performance instead of creating relations amongst objects and referencing/dereferencing foreign keyed fields sent['data']['resource_uri'] = API_PATH + 'sentence/' + url[len(url)-1] + '/' sentenceArray.append(sent['data']) new_obj.__dict__['_data']['sentences'] = sort_sentences(sentenceArray) # get a dictionary of related translations of this document relatedDocuments = gdb.query("""MATCH (d:`Document`)-[:sentences]->(s:`Sentence`)-[:words]->(w:`Word`)-[:translation]->(t:`Word`)<-[:words]-(s1:`Sentence`)<-[:sentences]-(d1:`Document`) WHERE HAS (d.CTS) AND d.CTS='"""+ document.properties['CTS'] +"""' RETURN DISTINCT d1 ORDER BY ID(d1)""") new_obj.__dict__['_data']['translations']={} for rd in relatedDocuments: doc = rd[0] url = doc['self'].split('/') if doc['data']['lang'] in CTS_LANG: new_obj.__dict__['_data']['translations'][doc['data']['lang']] = doc['data'] new_obj.__dict__['_data']['translations'][doc['data']['lang']]['resource_uri']= API_PATH + 'document/' + url[len(url)-1] +'/' return new_obj
def analyze(self, text, lower=False, source='no_filter', sim_func=utils.get_similarity, pagerank_config={'alpha': 0.85}): self.key_sentences = [] result = self.seg.segment(text=text, lower=lower) self.sentences = result.sentences self.words_no_filter = result.words_no_filter _source = result['words_' + source] self.key_sentences = utils.sort_sentences( sentences=self.sentences, words=_source)
def analyze(self, text, lower = False, source = 'no_stop_words', sim_func = utils.get_similarity, damping_coeffi=0.85): self.key_sentences = list() result = self.seg.segment(text=text, lower=lower) self.sentences = result.sentences self.words_no_filter = result.words_no_filter self.words_no_stop_words = result.words_no_stop_words self.words_all_filters = result.words_all_filters options = ['no_filter', 'no_stop_words', 'all_filters'] if source in options: source_ = result['words_' + source] else: source_ = result['words_no_stop_words'] self.key_sentences = utils.sort_sentences(sentences=self.sentences, words=source_, sim_func=sim_func, damping_coeffi=damping_coeffi)
def get_object_list(self, request): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) attrlist = ['CTS', 'length', 'sentence'] sentences = [] query_params = {} for obj in request.GET.keys(): if obj in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) elif obj.split('__')[0] in attrlist and request.GET.get( obj) is not None: query_params[obj] = request.GET.get(obj) # implement filtering if len(query_params) > 0: # generate query q = """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE """ # filter word on parameters for key in query_params: if len(key.split('__')) > 1: if key.split('__')[1] == 'contains': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split( '__')[0] + """=~'.*""" + query_params[ key] + """.*' AND """ elif key.split('__')[1] == 'startswith': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split( '__')[0] + """=~'""" + query_params[ key] + """.*' AND """ elif key.split('__')[1] == 'endswith': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split( '__')[0] + """=~'.*""" + query_params[ key] + """' AND """ elif key.split('__')[1] == 'gt': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split('__')[ 0] + """>""" + query_params[key] + """ AND """ elif key.split('__')[1] == 'lt': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split('__')[ 0] + """<""" + query_params[key] + """ AND """ elif key.split('__')[1] == 'isnot': if key.split('__')[0] == 'length': q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split( '__')[0] + """<>""" + query_params[ key] + """ AND """ else: q = q + """HAS (s.""" + key.split( '__')[0] + """) AND s.""" + key.split( '__')[0] + """<>'""" + query_params[ key] + """' AND """ else: if key == 'length': q = q + """HAS (s.""" + key + """) AND s.""" + key + """=""" + query_params[ key] + """ AND """ else: q = q + """HAS (s.""" + key + """) AND s.""" + key + """='""" + query_params[ key] + """' AND """ q = q[:len(q) - 4] q = q + """RETURN s, d ORDER BY ID(s)""" table = gdb.query(q) # default querying else: table = gdb.query( """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE HAS (s.CTS) RETURN s, d ORDER BY ID(s)""" ) # create the objects which was queried for and set all necessary attributes for t in table: sentence = t[0] document = t[1] url = sentence['self'].split('/') urlDoc = document['self'].split('/') new_obj = DataObject(url[len(url) - 1]) new_obj.__dict__['_data'] = sentence['data'] new_obj.__dict__['_data']['id'] = url[len(url) - 1] new_obj.__dict__['_data'][ 'document_resource_uri'] = API_PATH + 'document/' + urlDoc[ len(urlDoc) - 1] + '/' sentences.append(new_obj) if ENABLE_DISPLAYING_LONG_DOCUMENTS: if len(sentences) > 500: return sentences else: return sort_sentences(sentences) else: return sort_sentences(sentences) return sort_sentences(sentences)
def get_object_list(self, request): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) attrlist = ['CTS', 'name', 'name_eng', 'lang', 'author'] documents = [] query_params = {} for obj in request.GET.keys(): if obj in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) elif obj.split('__')[0] in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) # implement filtering if len(query_params) > 0: # generate query q = """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE """ # filter word on parameters for key in query_params: if len(key.split('__')) > 1: if key.split('__')[1] == 'contains': q = q + """HAS (d.""" +key.split('__')[0]+ """) AND d.""" +key.split('__')[0]+ """=~'.*""" +query_params[key]+ """.*' AND """ elif key.split('__')[1] == 'startswith': q = q + """HAS (d.""" +key.split('__')[0]+ """) AND d.""" +key.split('__')[0]+ """=~'""" +query_params[key]+ """.*' AND """ elif key.split('__')[1] == 'endswith': q = q + """HAS (d.""" +key.split('__')[0]+ """) AND d.""" +key.split('__')[0]+ """=~'.*""" +query_params[key]+ """' AND """ elif key.split('__')[1] == 'isnot': q = q + """HAS (d.""" +key.split('__')[0]+ """) AND d.""" +key.split('__')[0]+ """<>'""" +query_params[key]+ """' AND """ else: q = q + """HAS (d.""" +key+ """) AND d.""" +key+ """='""" +query_params[key]+ """' AND """ q = q[:len(q)-4] q = q + """RETURN DISTINCT d ORDER BY ID(d)""" table = gdb.query(q) # default querying else: table = gdb.query("""MATCH (d:`Document`) RETURN DISTINCT d ORDER BY ID(d)""") # create the objects which was queried for and set all necessary attributes for t in table: document = t[0] urlDoc = document['self'].split('/') new_obj = DataObject(urlDoc[len(urlDoc)-1]) new_obj.__dict__['_data'] = document['data'] new_obj.__dict__['_data']['id'] = urlDoc[len(urlDoc)-1] sentences = gdb.query("""MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE d.CTS='""" +document['data']['CTS']+ """' RETURN DISTINCT s ORDER BY ID(s)""") sentenceArray = [] for s in sentences: sent = s[0] url = sent['self'].split('/') sent_cts = sent['data']['CTS'] sent['data'] = {} sent['data']['resource_uri'] = API_PATH + 'sentence/' + url[len(url)-1] + '/' sent['data']['CTS'] = sent_cts sentenceArray.append(sent['data']) if ENABLE_DISPLAYING_LONG_DOCUMENTS: if len(sentenceArray) > 500: new_obj.__dict__['_data']['sentences'] = sentenceArray else: new_obj.__dict__['_data']['sentences'] = sort_sentences(sentenceArray) else: new_obj.__dict__['_data']['sentences'] = sort_sentences(sentenceArray) documents.append(new_obj) return documents
def get_object_list(self, request): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) attrlist = ['CTS', 'name', 'name_eng', 'lang', 'author'] documents = [] query_params = {} for obj in request.GET.keys(): if obj in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) elif obj.split('__')[0] in attrlist and request.GET.get( obj) is not None: query_params[obj] = request.GET.get(obj) # implement filtering if len(query_params) > 0: # generate query q = """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE """ # filter word on parameters for key in query_params: if len(key.split('__')) > 1: if key.split('__')[1] == 'contains': q = q + """HAS (d.""" + key.split( '__')[0] + """) AND d.""" + key.split( '__')[0] + """=~'.*""" + query_params[ key] + """.*' AND """ elif key.split('__')[1] == 'startswith': q = q + """HAS (d.""" + key.split( '__')[0] + """) AND d.""" + key.split( '__')[0] + """=~'""" + query_params[ key] + """.*' AND """ elif key.split('__')[1] == 'endswith': q = q + """HAS (d.""" + key.split( '__')[0] + """) AND d.""" + key.split( '__')[0] + """=~'.*""" + query_params[ key] + """' AND """ elif key.split('__')[1] == 'isnot': q = q + """HAS (d.""" + key.split( '__')[0] + """) AND d.""" + key.split( '__' )[0] + """<>'""" + query_params[key] + """' AND """ else: q = q + """HAS (d.""" + key + """) AND d.""" + key + """='""" + query_params[ key] + """' AND """ q = q[:len(q) - 4] q = q + """RETURN DISTINCT d ORDER BY ID(d)""" table = gdb.query(q) # default querying else: table = gdb.query( """MATCH (d:`Document`) RETURN DISTINCT d ORDER BY ID(d)""") # create the objects which was queried for and set all necessary attributes for t in table: document = t[0] urlDoc = document['self'].split('/') new_obj = DataObject(urlDoc[len(urlDoc) - 1]) new_obj.__dict__['_data'] = document['data'] new_obj.__dict__['_data']['id'] = urlDoc[len(urlDoc) - 1] sentences = gdb.query( """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE d.CTS='""" + document['data']['CTS'] + """' RETURN DISTINCT s ORDER BY ID(s)""") sentenceArray = [] for s in sentences: sent = s[0] url = sent['self'].split('/') sent_cts = sent['data']['CTS'] sent['data'] = {} sent['data']['resource_uri'] = API_PATH + 'sentence/' + url[ len(url) - 1] + '/' sent['data']['CTS'] = sent_cts sentenceArray.append(sent['data']) if ENABLE_DISPLAYING_LONG_DOCUMENTS: if len(sentenceArray) > 500: new_obj.__dict__['_data']['sentences'] = sentenceArray else: new_obj.__dict__['_data']['sentences'] = sort_sentences( sentenceArray) else: new_obj.__dict__['_data']['sentences'] = sort_sentences( sentenceArray) documents.append(new_obj) return documents
def get_object_list(self, request): gdb = GraphDatabase(GRAPH_DATABASE_REST_URL) attrlist = ['CTS', 'length', 'sentence'] sentences = [] query_params = {} for obj in request.GET.keys(): if obj in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) elif obj.split('__')[0] in attrlist and request.GET.get(obj) is not None: query_params[obj] = request.GET.get(obj) # implement filtering if len(query_params) > 0: # generate query q = """MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE """ # filter word on parameters for key in query_params: if len(key.split('__')) > 1: if key.split('__')[1] == 'contains': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """=~'.*""" +query_params[key]+ """.*' AND """ elif key.split('__')[1] == 'startswith': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """=~'""" +query_params[key]+ """.*' AND """ elif key.split('__')[1] == 'endswith': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """=~'.*""" +query_params[key]+ """' AND """ elif key.split('__')[1] == 'gt': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """>""" +query_params[key]+ """ AND """ elif key.split('__')[1] == 'lt': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """<""" +query_params[key]+ """ AND """ elif key.split('__')[1] == 'isnot': if key.split('__')[0] == 'length': q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """<>""" +query_params[key]+ """ AND """ else: q = q + """HAS (s.""" +key.split('__')[0]+ """) AND s.""" +key.split('__')[0]+ """<>'""" +query_params[key]+ """' AND """ else: if key == 'length': q = q + """HAS (s.""" +key+ """) AND s.""" +key+ """=""" +query_params[key]+ """ AND """ else: q = q + """HAS (s.""" +key+ """) AND s.""" +key+ """='""" +query_params[key]+ """' AND """ q = q[:len(q)-4] q = q + """RETURN s, d ORDER BY ID(s)""" table = gdb.query(q) # default querying else: table = gdb.query("""MATCH (d:`Document`)-[:sentences]->(s:`Sentence`) WHERE HAS (s.CTS) RETURN s, d ORDER BY ID(s)""") # create the objects which was queried for and set all necessary attributes for t in table: sentence = t[0] document = t[1] url = sentence['self'].split('/') urlDoc = document['self'].split('/') new_obj = DataObject(url[len(url)-1]) new_obj.__dict__['_data'] = sentence['data'] new_obj.__dict__['_data']['id'] = url[len(url)-1] new_obj.__dict__['_data']['document_resource_uri'] = API_PATH + 'document/' + urlDoc[len(urlDoc)-1] +'/' sentences.append(new_obj) return sort_sentences(sentences)