def nodeToExtSearch(self, node, schema): # build extended search index from node if len(node.getSearchFields() ) == 0: # stop if schema has no searchfields return True self.nodeToSchemaDef(node, schema) # save definition keyvalue = [] i = 1 for field in node.getSearchFields(): key = "field%d" % i i += 1 value = "" if field.getFieldtype() == "union": for item in field.get("valuelist").split(";"): value += node.get(item) + '|' else: value = node.get(field.getName()) keyvalue += [(key, modify_tex(u(protect(value)), 'strip'))] sql0 = 'SELECT id FROM searchmeta where id=\'{}\''.format(node.id) sql1 = 'UPDATE searchmeta SET ' sql2 = 'INSERT INTO searchmeta (id, type, schema, updatetime' for key, value in keyvalue: sql1 += key + "='" + normalize_utf8(value) + "', " sql2 += ", " sql2 += key sql1 += "type='" + node.getContentType( ) + "', schema='" + schema + "', updatetime='" + node.get( "updatetime") + "'" sql2 += ") VALUES(" sql2 += '\'{}\', "{}", "{}", "{}"'.format(node.id, node.getContentType(), schema, node.get("updatetime")) for key, value in keyvalue: sql2 += ", '" + normalize_utf8(value) + "'" sql1 += " WHERE id='{}'".format(node.id) sql2 += ")" sql = "" try: sql = sql0 if self.execute(sql0, schema, 'ext'): # select sql = sql1 self.execute(sql1, schema, 'ext') # do update else: sql = sql2 self.execute(sql2, schema, 'ext') # do insert return True except: logException('error in sqlite insert/update: ' + sql) return False
def nodeToExtSearch(self, node, schema): # build extended search index from node if len(node.getSearchFields()) == 0: # stop if schema has no searchfields return True self.nodeToSchemaDef(node, schema) # save definition keyvalue = [] i = 1 for field in node.getSearchFields(): key = "field%d" % i i += 1 value = "" if field.getFieldtype() == "union": for item in field.get("valuelist").split(";"): value += node.get(item) + '|' else: value = node.get(field.getName()) keyvalue += [(key, modify_tex(u(protect(value)), 'strip'))] sql0 = 'SELECT id FROM searchmeta where id=\'{}\''.format(node.id) sql1 = 'UPDATE searchmeta SET ' sql2 = 'INSERT INTO searchmeta (id, type, schema, updatetime' for key, value in keyvalue: sql1 += key + "='" + normalize_utf8(value) + "', " sql2 += ", " sql2 += key sql1 += "type='" + node.getContentType() + "', schema='" + schema + "', updatetime='" + node.get("updatetime") + "'" sql2 += ") VALUES(" sql2 += '\'{}\', "{}", "{}", "{}"'.format(node.id, node.getContentType(), schema, node.get("updatetime")) for key, value in keyvalue: sql2 += ", '" + normalize_utf8(value) + "'" sql1 += " WHERE id='{}'".format(node.id) sql2 += ")" sql = "" try: sql = sql0 if self.execute(sql0, schema, 'ext'): # select sql = sql1 self.execute(sql1, schema, 'ext') # do update else: sql = sql2 self.execute(sql2, schema, 'ext') # do insert return True except: logException('error in sqlite insert/update: ' + sql) return False
def getSQL(type, value, spc={}): # deliver sql for given type value = normalize_utf8(protect(u(value))) if type == "full": # all metadata return 'select distinct(id) from fullsearchmeta where fullsearchmeta match \'value:' + value + '\' and type <>\'directory\'' elif type == "fulltext": # fulltext return 'select distinct(id) from textsearchmeta where textsearchmeta match \'value:' + value + '\' and type<>\'directory\'' elif type == "schema": # schemadef return 'select distinct(id) from fullsearchmeta where schema="' + value.replace( "'", "") + '"' elif type == "objtype": # object type return 'select distinct(id) from fullsearchmeta where type="' + value.replace( "'", "") + '"' elif type == "updatetime": # update time with operator <|>|= if len(value) == 10: value += "T00:00:00" return 'select distinct(id) from searchmeta where updatetime ' + spc[ 'op'] + ' "' + value.replace("t", "T") + '"' elif type == "field": return 'select position, name from searchmeta_def where attrname=\'' + value + '\'' elif type == "spcompare": return 'select distinct(id) from searchmeta where schema="' + \ str(spc['pos'][1]) + '" and field' + str(spc['pos'][0]) + ' ' + spc['op'] + ' "' + value + '"' elif type == "spfield": return 'select distinct(id) from searchmeta where field' + str( spc['pos'][0]) + '=""' elif type == "spmatch": return 'select distinct(id) from searchmeta where schema=\'' + \ str(spc['pos'][1]) + '\' and field' + str(spc['pos'][0]) + ' match \'' + value + '\'' elif type == "content_full": return 'select * from fullsearchmeta where id=\'' + value + '\'' elif type == "content_text": return 'select * from textsearchmeta where id=\'' + value + '\'' elif type == "content_ext": return 'select * from searchmeta where id=\'' + value + '\''
def getSQL(type, value, spc={}): # deliver sql for given type value = normalize_utf8(protect(u(value))) if type == "full": # all metadata return 'select distinct(id) from fullsearchmeta where fullsearchmeta match \'value:' + value + '\' and type <>\'directory\'' elif type == "fulltext": # fulltext return 'select distinct(id) from textsearchmeta where textsearchmeta match \'value:' + value + '\' and type<>\'directory\'' elif type == "schema": # schemadef return 'select distinct(id) from fullsearchmeta where schema="' + value.replace("'", "") + '"' elif type == "objtype": # object type return 'select distinct(id) from fullsearchmeta where type="' + value.replace("'", "") + '"' elif type == "updatetime": # update time with operator <|>|= if len(value) == 10: value += "T00:00:00" return 'select distinct(id) from searchmeta where updatetime ' + spc['op'] + ' "' + value.replace("t", "T") + '"' elif type == "field": return 'select position, name from searchmeta_def where attrname=\'' + value + '\'' elif type == "spcompare": return 'select distinct(id) from searchmeta where schema="' + \ str(spc['pos'][1]) + '" and field' + str(spc['pos'][0]) + ' ' + spc['op'] + ' "' + value + '"' elif type == "spfield": return 'select distinct(id) from searchmeta where field' + str(spc['pos'][0]) + '=""' elif type == "spmatch": return 'select distinct(id) from searchmeta where schema=\'' + \ str(spc['pos'][1]) + '\' and field' + str(spc['pos'][0]) + ' match \'' + value + '\'' elif type == "content_full": return 'select * from fullsearchmeta where id=\'' + value + '\'' elif type == "content_text": return 'select * from textsearchmeta where id=\'' + value + '\'' elif type == "content_ext": return 'select * from searchmeta where id=\'' + value + '\''
def nodeToExtSearch(self, node): # build extended search index from node if len(node.getSearchFields()) == 0: # stop if schema has no searchfields return True v_list = {} i = 1 for field in node.getSearchFields(): v_list[str(i)] = node.get(field.getName()) i += 1 # save definition self.nodeToSchemaDef(node) sql = 'INSERT INTO searchmeta (id, type, schema, ' values = '' try: if len(v_list) > 0: for key in v_list: sql += 'field' + str(key) + ', ' #values += '"'+u(v_list[key])+ '", ' values += '"' + normalize_utf8(u(v_list[key])) + '", ' sql = sql[:-2] values = values[:-2] sql = '{}) VALUES("{}", "{}", "{}", {})'.format( sql, node.id, node.getContentType(), node.getSchema(), values) else: sql = sql[:-2] sql = '{}) VALUES("{}", "{}", "{}")'.format( sql, node.id, node.getContentType(), node.getSchema()) self.db.execute(sql) return True except: return False
def nodeToSimpleSearch(self, node, schema, type=""): # build simple search index from node sql_upd = "UPDATE fullsearchmeta SET type='{}', schema='{}', value='{}| ".format( node.getContentType(), node.getSchema(), node.name) sql_ins = "INSERT INTO fullsearchmeta (id, type, schema, value) VALUES('{}', '{}', '{}', '{}| ".format( node.id, node.getContentType(), node.getSchema(), node.name) # attributes val = '' for key, value in node.items(): if key not in SYSTEMATTRS: # ignore system attributes val += protect(u(value)) + '| ' for v in val.split(" "): v = u(v) if normalize_utf8(v) != v.lower(): val += ' ' + normalize_utf8(v) val = val.replace(chr(0), "") + ' ' # remove tex markup val = modify_tex(val, 'strip') # files for file in node.getFiles(): val += protect( u(file.getName() + '| ' + file.getType() + '| ' + file.getMimeType()) + '| ') sql_upd += val + '\' WHERE id=\'{}\''.format(node.id) sql_ins += val + '\')' sql = "" try: sql = 'SELECT id from fullsearchmeta WHERE id=\'{}\''.format( node.id) if self.execute(sql, schema, 'full'): # check existance sql = sql_upd # do update else: sql = sql_ins # do insert self.execute(sql, schema, 'full') return True except: logException('error in sqlite insert/update: ' + sql) return False
def nodeToSimpleSearch(self, node, schema, type=""): # build simple search index from node sql_upd ="UPDATE fullsearchmeta SET type='{}', schema='{}', value='{}| ".format(node.getContentType(), node.getSchema(), node.name) sql_ins = "INSERT INTO fullsearchmeta (id, type, schema, value) VALUES('{}', '{}', '{}', '{}| ".format(node.id, node.getContentType(), node.getSchema(), node.name) # attributes val = '' for key, value in node.items(): if key not in SYSTEMATTRS: # ignore system attributes val += protect(u(value)) + '| ' for v in val.split(" "): v = u(v) if normalize_utf8(v) != v.lower(): val += ' ' + normalize_utf8(v) val = val.replace(chr(0), "") + ' ' # remove tex markup val = modify_tex(val, 'strip') # files for file in node.getFiles(): val += protect(u(file.getName() + '| ' + file.getType() + '| ' + file.getMimeType()) + '| ') sql_upd += val + '\' WHERE id=\'{}\''.format(node.id) sql_ins += val + '\')' sql = "" try: sql = 'SELECT id from fullsearchmeta WHERE id=\'{}\''.format(node.id) if self.execute(sql, schema, 'full'): # check existance sql = sql_upd # do update else: sql = sql_ins # do insert self.execute(sql, schema, 'full') return True except: logException('error in sqlite insert/update: ' + sql) return False
def get_extended_field_ratio(schema, node, db_content): """ Compares the values in the ext search db and the values in the node instance and returns a ratio of likeness between the two values. @param schema: String, name of the schema @param node: Node, an core.tree node instance @return: Float """ ratios = [] field_names = get_zero_index_schema_fields(schema) for field in field_names: node_value = normalize_utf8(modify_tex(u(protect(node.get(field.name))), 'strip')) db_value = str(db_content[field.position]) equality_ratio = difflib.SequenceMatcher(None, db_value, node_value).ratio() ratios.append(equality_ratio) return sum(ratios) / len(ratios)
def get_extended_field_ratio(schema, node, db_content): """ Compares the values in the ext search db and the values in the node instance and returns a ratio of likeness between the two values. @param schema: String, name of the schema @param node: Node, an core.tree node instance @return: Float """ ratios = [] field_names = get_zero_index_schema_fields(schema) for field in field_names: node_value = normalize_utf8( modify_tex(u(protect(node.get(field.name))), 'strip')) db_value = str(db_content[field.position]) equality_ratio = difflib.SequenceMatcher(None, db_value, node_value).ratio() ratios.append(equality_ratio) return sum(ratios) / len(ratios)
def nodeToExtSearch(self, node): # build extended search index from node if len(node.getSearchFields()) == 0: # stop if schema has no searchfields return True v_list = {} i = 1 for field in node.getSearchFields(): v_list[str(i)] = node.get(field.getName()) i += 1 # save definition self.nodeToSchemaDef(node) sql = 'INSERT INTO searchmeta (id, type, schema, ' values = '' try: if len(v_list) > 0: for key in v_list: sql += 'field' + str(key) + ', ' #values += '"'+u(v_list[key])+ '", ' values += '"' + normalize_utf8(u(v_list[key])) + '", ' sql = sql[:-2] values = values[:-2] sql = '{}) VALUES("{}", "{}", "{}", {})'.format(sql, node.id, node.getContentType(), node.getSchema(), values) else: sql = sql[:-2] sql = '{}) VALUES("{}", "{}", "{}")'.format(sql, node.id, node.getContentType(), node.getSchema()) self.db.execute(sql) return True except: return False
def nodeToSimpleSearch(self, node): # build simple search index from node try: sql = 'INSERT INTO fullsearchmeta (id, type, schema, value) VALUES(\'{}\', \'{}\', \'{}\', \'{}| '.format(node.id, node.getContentType(), node.getSchema(), node.name) # attributes a = '' for key, value in node.items(): if key not in SYSTEMATTRS: a += protect(u(value)) + '| ' a = normalize_utf8(a) sql += a # files for file in node.getFiles(): sql += protect(u(file.getName() + '| ' + file.getType() + '| ' + file.getMimeType()) + '| ') sql += '\')' self.db.execute(sql) return True except: return False
def nodeToSimpleSearch(self, node): # build simple search index from node try: sql = 'INSERT INTO fullsearchmeta (id, type, schema, value) VALUES(\'{}\', \'{}\', \'{}\', \'{}| '.format( node.id, node.getContentType(), node.getSchema(), node.name) # attributes a = '' for key, value in node.items(): if key not in SYSTEMATTRS: a += protect(u(value)) + '| ' a = normalize_utf8(a) sql += a # files for file in node.getFiles(): sql += protect( u(file.getName() + '| ' + file.getType() + '| ' + file.getMimeType()) + '| ') sql += '\')' self.db.execute(sql) return True except: return False
def nodeToFulltextSearch(self, node, schema): # build fulltext index from node if not hasattr(node, "getCategoryName") or not node.getCategoryName( ) == "document": # only build fulltext of document nodes return True r = re.compile("[a-zA-Z0-9]+") if self.execute( 'SELECT id from textsearchmeta where id=\'{}\''.format( node.id), schema, 'text'): # FIXME: we should not delete the old textdata from this node, and insert # the new files. Only problem is, DELETE from a FTS3 table is prohibitively # slow. return for file in node.getFiles(): w = '' if file.getType() == "fulltext" and os.path.exists( file.retrieveFile()): data = {} content = '' f = open(file.retrieveFile()) try: for line in f: if FULLTEXT_INDEX_MODE == 0: content += u(line) else: for w in re.findall(r, line): if w not in data.keys(): data[w] = 1 try: data[w] += 1 except KeyError: data[w] = 1 finally: f.close() if FULLTEXT_INDEX_MODE == 1: for key in data.keys(): content += key + " " elif FULLTEXT_INDEX_MODE == 2: for key in data.keys(): content += key + " [" + str(data[key]) + "] " content = u(content.replace("'", "").replace('"', "")) if len(content) > 0: content_len = len(content) p = 0 while p in range(0, int(ceil(content_len / 500000.0))): sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format( node.id, node.getContentType(), schema, normalize_utf8( (content[p * 500000:(p + 1) * 500000 - 1]))) try: self.execute(sql, schema, 'text') except: print "\nerror in fulltext of node", node.id return False p += 1 return True return True
def nodeToFulltextSearch(self, node, schema): # build fulltext index from node if not hasattr(node, "getCategoryName") or not node.getCategoryName() == "document": # only build fulltext of document nodes return True r = re.compile("[a-zA-Z0-9]+") if self.execute('SELECT id from textsearchmeta where id=\'{}\''.format(node.id), schema, 'text'): # FIXME: we should not delete the old textdata from this node, and insert # the new files. Only problem is, DELETE from a FTS3 table is prohibitively # slow. return for file in node.getFiles(): w = '' if file.getType() == "fulltext" and os.path.exists(file.retrieveFile()): data = {} content = '' f = open(file.retrieveFile()) try: for line in f: if FULLTEXT_INDEX_MODE == 0: content += u(line) else: for w in re.findall(r, line): if w not in data.keys(): data[w] = 1 try: data[w] += 1 except KeyError: data[w] = 1 finally: f.close() if FULLTEXT_INDEX_MODE == 1: for key in data.keys(): content += key + " " elif FULLTEXT_INDEX_MODE == 2: for key in data.keys(): content += key + " [" + str(data[key]) + "] " content = u(content.replace("'", "").replace('"', "")) if len(content) > 0: content_len = len(content) p = 0 while p in range(0, int(ceil(content_len / 500000.0))): sql = 'INSERT INTO textsearchmeta (id, type, schema, value) VALUES("{}", "{}", "{}", "{}")'.format(node.id, node.getContentType(), schema, normalize_utf8((content[p * 500000:(p + 1) * 500000 - 1]))) try: self.execute(sql, schema, 'text') except: print "\nerror in fulltext of node", node.id return False p += 1 return True return True