def testIndexContent2(self): index = Index(fs, testIndexPath) index.create() content = "" meta = { "one": "oNe", "tags": ["one", "tWo", "teST", "this"], "path": "/one/TWo/three" } index.indexContent(content, id="testId", metaItems=meta, comments="Comments One") meta = { "one": "ONE", "tags": ["one", "twO", "Test", "this"], "path": "/one/TWo/four" } index.indexContent(content, id="testId2", metaItems=meta, comments="Comments Two") # NOTE: there are NO wild cards in keywordQueries and are case-sensitive (not the storing analyzer # may have already lowerCased the content # also pathQueryStr is case-sensitive and may only contain a optional trailing '*' searchResults = index.search(queryStr="tags:tEst ", keywordQueryStr=None, pathQueryStr=None) for id, doc in searchResults.iteritems(): #print "'%s', '%s', '%s'" % (doc.get("comments"), doc.get("one"), doc.get("list")) pass self.assertEqual(len(searchResults), 2) searchResults.close() searchResults = index.search(queryStr=None, keywordQueryStr=None, pathQueryStr="/one/TWo/*") for id, doc in searchResults.iteritems(): #print "'%s', '%s', '%s'" % (doc.get("comments"), doc.get("one"), doc.get("list")) pass self.assertEqual(len(searchResults), 2) searchResults.close()
def insert(table_name, values, if_str_command): value = [] attr = catalog_manager.get_attribute(table_name) for i in attr: value.append(values[i[0]]) attr1 = catalog_manager.get_primary(table_name) index_id = catalog_manager.get_index_id(table_name, attr1) for i in attr: attr2 = i[0] if catalog_manager.check_unique(table_name, attr2) == 1: if catalog_manager.check_index(table_name, attr2) == 1: try: temp = index.search(catalog_manager.get_index_id( table_name, attr2), values[attr2], is_range=False, is_not_equal=False) except Exception: temp = [] else: if (if_str_command): ret = "Unique value has already exists\n" return ret print("Unique value has already exists\n") return else: conditionlist = [] condition = Condition_2( catalog_manager.get_attribute_cnt(table_name, attr2), 0, values[attr2]) conditionlist.append(condition) # fixme: record manager should throw proper exception on empty file or empty table # I'm currently adding a small stub try: temp = record_manager.select_record_with_Index( table_name, 0, conditionlist) except Exception as e: # fixme: this will print the exception, reminding us to fix this if (if_str_command): ret = str(e) + "\n" return ret print(e) temp = [] if len(temp) != 0: if (if_str_command): ret = "Unique value has already exists\n" return ret print("Unique value has already exists\n") return line = record_manager.insert_record(table_name, value) for attr_ in attr: attr_name = attr_[0] if catalog_manager.check_index(table_name, attr_name) == 1: index.insert(catalog_manager.get_index_id(table_name, attr_name), values[attr_name], line)
def test_search(pip): index.S3_PAGINATOR.paginate.return_value = iter(S3_INDEX_RESPONSE) request = textwrap.dedent(f'''\ <?xml version='1.0'?> <methodCall> <methodName>search</methodName> <params> <param> <value><struct> <member> <name>name</name> <value><array><data> <value><string>{pip}</string></value> </data></array></value> </member> <member> <name>summary</name> <value><array><data> <value><string>{pip}</string></value> </data></array></value> </member> </struct></value> </param> <param> <value><string>or</string></value> </param> </params> </methodCall>\ ''') body = ( "<?xml version='1.0'?><methodResponse><params><param><value><array>" "<data><struct><member><name>name</name><value><string>fizz</string>" "</value></member><member><name>summary</name><value>" "<string>s3://serverless-pypi/simple/fizz/fizz-1.2.3.tar.gz</string>" "</value></member><member><name>version</name><value>" "<string>1.2.3</string></value></member><member>" "<name>_pypi_ordering</name><value><boolean>0</boolean></value>" "</member></struct></data></array></value></param></params>" "</methodResponse>" ) ret = index.search(request) exp = { 'body': body, 'statusCode': 200, 'headers': { 'Content-Length': len(body), 'Content-Type': 'text/xml; charset=UTF-8', }, } assert ret == exp
def testIndexContent3(self): index = Index(fs, testIndexPath) index.create() content = "" meta = {"ids": ["1234"], "1234": ["tag1 tag2", "tag3"]} index.indexContent(content, id="one", metaItems=meta) searchResults = index.search("ids:1234") #print "found %s" % len(searchResults) for doc in searchResults.itervalues(): #print doc.get("1234") #print "x='%s'" % doc.get("x") #print dir(doc) for field in doc.getFields(): #print field.name(), field.stringValue() pass searchResults.close()
#!/usr/bin/python3 import index import re import sys class UrlIndex(index.Index): def tokenize(self, record): ws = [w for w in re.split("[-_/.?+&:\W]+|(\d+)", record) if w] return ws if __name__ == "__main__": if len(sys.argv) != 2: print("wrong sys.argv. len:", len(sys.argv)) sys.exit(1) records = [] for line in sys.stdin: records.append(line.strip()) index = UrlIndex(records) result = index.search(sys.argv[1]) for score, doc in result: print(score, doc)
def delete(table_name, conditions, if_str_command): num_changed = 0 if_index = catalog_manager.check_index(table_name, conditions.attribute_name) if if_index == 1: index_id = catalog_manager.get_index_id(table_name, conditions.attribute_name) if conditions.op == '=': temp = index.search(index_id, conditions.operand, is_range=False, is_not_equal=False) index.delete(index_id, conditions.operand, is_range=False, is_not_equal=False) elif conditions.op == '<': temp = index.search(index_id, conditions.operand, is_greater=False, is_current=False, is_range=True, is_not_equal=False) index.delete(index_id, conditions.operand, is_greater=False, is_current=False, is_range=True, is_not_equal=False) elif conditions.op == '>': temp = index.search(index_id, conditions.operand, is_greater=True, is_current=False, is_range=True, is_not_equal=False) index.delete(index_id, conditions.operand, is_greater=True, is_current=False, is_range=True, is_not_equal=False) elif conditions.op == '<>': temp = index.search(index_id, conditions.operand, is_range=True, is_not_equal=True) index.delete(index_id, conditions.operand, is_range=True, is_not_equal=True) elif conditions.op == '<=': temp = index.search(index_id, conditions.operand, is_greater=False, is_current=True, is_range=True, is_not_equal=False) index.delete(index_id, conditions.operand, is_greater=False, is_current=True, is_range=True, is_not_equal=False) elif conditions.op == '>=': temp = index.search(index_id, conditions.operand, is_greater=True, is_current=True, is_range=True, is_not_equal=False) index.delete(index_id, conditions.operand, is_greater=True, is_current=True, is_range=True, is_not_equal=False) if (type(temp) == int): num_changed = 1 ret = record_manager.select_record_with_Index( table_name, [[temp]], []) record_manager.delete_record_with_Index(table_name, [[temp]], []) else: num_changed = len(temp) ret = record_manager.select_record_with_Index( table_name, [temp], []) record_manager.delete_record_with_Index(table_name, [temp], []) attr = catalog_manager.get_attribute(table_name) for i in attr: attr1 = i[0] if catalog_manager.check_index( table_name, attr1) == 1 and attr1 != conditions.attribute_name: index_id = catalog_manager.get_index_id(table_name, attr1) for infomation in ret: index.delete(index_id, infomation[catalog_manager.get_attribute_cnt( table_name, attr1)], is_range=False, is_not_equal=False) else: conditionlist = [] cond = Condition_2() cond.value = conditions.operand cond.attribute = catalog_manager.get_attribute_cnt( table_name, conditions.attribute_name) if conditions.op == '=': cond.type = 0 elif conditions.op == '<': cond.type = 1 elif conditions.op == '>': cond.type = 2 else: cond.type = 3 conditionlist.append(cond) ret = record_manager.select_record_with_Index(table_name, 0, conditionlist) record_manager.delete_record_with_Index(table_name, 0, conditionlist) attr = catalog_manager.get_attribute(table_name) num_changed = len(ret) for i in attr: attr1 = i[0] if catalog_manager.check_index(table_name, attr1) == 1: index_id = catalog_manager.get_index_id(table_name, attr1) for infomation in ret: index.delete(index_id, infomation[catalog_manager.get_attribute_cnt( table_name, attr1)], is_range=False, is_not_equal=False) return "number of records deleted:" + str(num_changed) + "\n"
def select_index(table_name, not_index_conditions, index_conditions, if_str_command): templist = [] try: for index_condition in index_conditions: index_id = catalog_manager.get_index_id( table_name, index_condition.attribute_name) if index_condition.op == '=': temp = index.search(index_id, index_condition.operand, is_range=False, is_not_equal=False) elif index_condition.op == '<': temp = index.search(index_id, index_condition.operand, is_greater=False, is_current=False, is_range=True, is_not_equal=False) elif index_condition.op == '>': temp = index.search(index_id, index_condition.operand, is_greater=True, is_current=False, is_range=True, is_not_equal=False) elif index_condition.op == '<>': temp = index.search(index_id, index_condition.operand, is_range=True, is_not_equal=True) elif index_condition.op == '<=': temp = index.search(index_id, index_condition.operand, is_greater=False, is_current=True, is_range=True, is_not_equal=False) elif index_condition.op == '>=': temp = index.search(index_id, index_condition.operand, is_greater=True, is_current=True, is_range=True, is_not_equal=False) if (type(temp) == int): templist.append([temp]) else: templist.append(temp) except Exception as e: if (if_str_command): ret = str(e) + "\n" return ret print(e) else: conditionlist = [] for condi in not_index_conditions: cond = Condition_2() cond.value = condi.operand cond.attribute = catalog_manager.get_attribute_cnt( table_name, condi.attribute_name) if condi.op == '=': cond.type = 0 elif condi.op == '<': cond.type = 1 elif condi.op == '>': cond.type = 2 else: cond.type = 3 conditionlist.append(cond) if (if_str_command): result = "" ret = record_manager.select_record_with_Index( table_name, templist, conditionlist) if len(ret) == 0: return "empty" result = result + "number of record selected:" + str( len(ret)) + "\n" attr = catalog_manager.get_attribute(table_name) cnt = 0 for attribute in attr: result = result + attribute[0] + "\t\t" result = result + "\n" for info in ret: if info[0] >= 1080101000: result = result + str(info[0]) + "\t" + str( info[1]) + "\t" + str(info[2]) + "\n" else: result = result + str(info[0]) + "\t" + str( info[1]) + "\t\t" + str(info[2]) + "\n" return result print( record_manager.select_record_with_Index(table_name, templist, conditionlist))
def test_search(pip): index.S3_PAGINATOR.paginate.return_value = iter(S3_INDEX_RESPONSE) request = re.sub( r'\n *', '', f''' <?xml version='1.0'?> <methodCall> <methodName>search</methodName> <params> <param> <value> <struct> <member> <name>name</name> <value> <array> <data> <value> <string>{pip}</string> </value> </data> </array> </value> </member> <member> <name>summary</name> <value> <array> <data> <value> <string>{pip}</string> </value> </data> </array> </value> </member> </struct> </value> </param> <param> <value> <string>or</string> </value> </param> </params> </methodCall> ''') body = SEARCH.safe_substitute(data=SEARCH_VALUE.safe_substitute( name='fizz', summary='s3://serverless-pypi/fizz/fizz-1.2.3.tar.gz', version='1.2.3', )) ret = index.search(request) exp = { 'body': body, 'statusCode': 200, 'headers': { 'content-length': len(body), 'content-type': 'text/xml; charset=utf-8', }, } assert ret == exp
import index, argparse parser = argparse.ArgumentParser(description="Searches your index.") parser.add_argument("query", help="Search query") args = parser.parse_args() results = index.search(args.query) if len(results) == 0: print("No results found.") else: for i, res in enumerate(results, 1): print("{!s}) {} ({})".format(i, res["title"], res["url"]))