Beispiel #1
0
    def testIndexContent2(self):
        index = Index(fs, testIndexPath)
        index.create()
        content = ""
        meta = {
            "one": "oNe",
            "tags": ["one", "tWo", "teST", "this"],
            "path": "/one/TWo/three"
        }
        index.indexContent(content,
                           id="testId",
                           metaItems=meta,
                           comments="Comments One")

        meta = {
            "one": "ONE",
            "tags": ["one", "twO", "Test", "this"],
            "path": "/one/TWo/four"
        }
        index.indexContent(content,
                           id="testId2",
                           metaItems=meta,
                           comments="Comments Two")

        # NOTE: there are NO wild cards in keywordQueries and are case-sensitive (not the storing analyzer
        #           may have already lowerCased the content
        #   also pathQueryStr is case-sensitive and may only contain a optional trailing '*'
        searchResults = index.search(queryStr="tags:tEst ",
                                     keywordQueryStr=None,
                                     pathQueryStr=None)
        for id, doc in searchResults.iteritems():
            #print "'%s', '%s', '%s'" % (doc.get("comments"), doc.get("one"), doc.get("list"))
            pass
        self.assertEqual(len(searchResults), 2)
        searchResults.close()

        searchResults = index.search(queryStr=None,
                                     keywordQueryStr=None,
                                     pathQueryStr="/one/TWo/*")
        for id, doc in searchResults.iteritems():
            #print "'%s', '%s', '%s'" % (doc.get("comments"), doc.get("one"), doc.get("list"))
            pass
        self.assertEqual(len(searchResults), 2)
        searchResults.close()
Beispiel #2
0
def insert(table_name, values, if_str_command):
    value = []
    attr = catalog_manager.get_attribute(table_name)
    for i in attr:
        value.append(values[i[0]])
    attr1 = catalog_manager.get_primary(table_name)
    index_id = catalog_manager.get_index_id(table_name, attr1)
    for i in attr:
        attr2 = i[0]
        if catalog_manager.check_unique(table_name, attr2) == 1:
            if catalog_manager.check_index(table_name, attr2) == 1:
                try:
                    temp = index.search(catalog_manager.get_index_id(
                        table_name, attr2),
                                        values[attr2],
                                        is_range=False,
                                        is_not_equal=False)
                except Exception:
                    temp = []
                else:
                    if (if_str_command):
                        ret = "Unique value has already exists\n"
                        return ret
                    print("Unique value has already exists\n")
                    return
            else:
                conditionlist = []
                condition = Condition_2(
                    catalog_manager.get_attribute_cnt(table_name, attr2), 0,
                    values[attr2])
                conditionlist.append(condition)
                # fixme: record manager should throw proper exception on empty file or empty table
                # I'm currently adding a small stub
                try:
                    temp = record_manager.select_record_with_Index(
                        table_name, 0, conditionlist)
                except Exception as e:
                    # fixme: this will print the exception, reminding us to fix this
                    if (if_str_command):
                        ret = str(e) + "\n"
                        return ret
                    print(e)
                    temp = []
                if len(temp) != 0:
                    if (if_str_command):
                        ret = "Unique value has already exists\n"
                        return ret
                    print("Unique value has already exists\n")
                    return
    line = record_manager.insert_record(table_name, value)
    for attr_ in attr:
        attr_name = attr_[0]
        if catalog_manager.check_index(table_name, attr_name) == 1:
            index.insert(catalog_manager.get_index_id(table_name, attr_name),
                         values[attr_name], line)
Beispiel #3
0
def test_search(pip):
    index.S3_PAGINATOR.paginate.return_value = iter(S3_INDEX_RESPONSE)
    request = textwrap.dedent(f'''\
        <?xml version='1.0'?>
        <methodCall>
        <methodName>search</methodName>
        <params>
        <param>
        <value><struct>
        <member>
        <name>name</name>
        <value><array><data>
        <value><string>{pip}</string></value>
        </data></array></value>
        </member>
        <member>
        <name>summary</name>
        <value><array><data>
        <value><string>{pip}</string></value>
        </data></array></value>
        </member>
        </struct></value>
        </param>
        <param>
        <value><string>or</string></value>
        </param>
        </params>
        </methodCall>\
    ''')
    body = (
        "<?xml version='1.0'?><methodResponse><params><param><value><array>"
        "<data><struct><member><name>name</name><value><string>fizz</string>"
        "</value></member><member><name>summary</name><value>"
        "<string>s3://serverless-pypi/simple/fizz/fizz-1.2.3.tar.gz</string>"
        "</value></member><member><name>version</name><value>"
        "<string>1.2.3</string></value></member><member>"
        "<name>_pypi_ordering</name><value><boolean>0</boolean></value>"
        "</member></struct></data></array></value></param></params>"
        "</methodResponse>"
    )
    ret = index.search(request)
    exp = {
        'body': body,
        'statusCode': 200,
        'headers': {
            'Content-Length': len(body),
            'Content-Type': 'text/xml; charset=UTF-8',
        },
    }
    assert ret == exp
Beispiel #4
0
    def testIndexContent3(self):
        index = Index(fs, testIndexPath)
        index.create()
        content = ""
        meta = {"ids": ["1234"], "1234": ["tag1 tag2", "tag3"]}
        index.indexContent(content, id="one", metaItems=meta)

        searchResults = index.search("ids:1234")
        #print "found %s" % len(searchResults)
        for doc in searchResults.itervalues():
            #print doc.get("1234")
            #print "x='%s'" % doc.get("x")
            #print dir(doc)
            for field in doc.getFields():
                #print field.name(), field.stringValue()
                pass
        searchResults.close()
Beispiel #5
0
#!/usr/bin/python3

import index
import re
import sys


class UrlIndex(index.Index):
    def tokenize(self, record):
        ws = [w for w in re.split("[-_/.?+&:\W]+|(\d+)", record) if w]
        return ws


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("wrong sys.argv. len:", len(sys.argv))
        sys.exit(1)

    records = []
    for line in sys.stdin:
        records.append(line.strip())

    index = UrlIndex(records)
    result = index.search(sys.argv[1])
    for score, doc in result:
        print(score, doc)
Beispiel #6
0
def delete(table_name, conditions, if_str_command):
    num_changed = 0
    if_index = catalog_manager.check_index(table_name,
                                           conditions.attribute_name)
    if if_index == 1:
        index_id = catalog_manager.get_index_id(table_name,
                                                conditions.attribute_name)
        if conditions.op == '=':
            temp = index.search(index_id,
                                conditions.operand,
                                is_range=False,
                                is_not_equal=False)
            index.delete(index_id,
                         conditions.operand,
                         is_range=False,
                         is_not_equal=False)
        elif conditions.op == '<':
            temp = index.search(index_id,
                                conditions.operand,
                                is_greater=False,
                                is_current=False,
                                is_range=True,
                                is_not_equal=False)
            index.delete(index_id,
                         conditions.operand,
                         is_greater=False,
                         is_current=False,
                         is_range=True,
                         is_not_equal=False)
        elif conditions.op == '>':
            temp = index.search(index_id,
                                conditions.operand,
                                is_greater=True,
                                is_current=False,
                                is_range=True,
                                is_not_equal=False)
            index.delete(index_id,
                         conditions.operand,
                         is_greater=True,
                         is_current=False,
                         is_range=True,
                         is_not_equal=False)
        elif conditions.op == '<>':
            temp = index.search(index_id,
                                conditions.operand,
                                is_range=True,
                                is_not_equal=True)
            index.delete(index_id,
                         conditions.operand,
                         is_range=True,
                         is_not_equal=True)
        elif conditions.op == '<=':
            temp = index.search(index_id,
                                conditions.operand,
                                is_greater=False,
                                is_current=True,
                                is_range=True,
                                is_not_equal=False)
            index.delete(index_id,
                         conditions.operand,
                         is_greater=False,
                         is_current=True,
                         is_range=True,
                         is_not_equal=False)
        elif conditions.op == '>=':
            temp = index.search(index_id,
                                conditions.operand,
                                is_greater=True,
                                is_current=True,
                                is_range=True,
                                is_not_equal=False)
            index.delete(index_id,
                         conditions.operand,
                         is_greater=True,
                         is_current=True,
                         is_range=True,
                         is_not_equal=False)
        if (type(temp) == int):
            num_changed = 1
            ret = record_manager.select_record_with_Index(
                table_name, [[temp]], [])
            record_manager.delete_record_with_Index(table_name, [[temp]], [])
        else:
            num_changed = len(temp)
            ret = record_manager.select_record_with_Index(
                table_name, [temp], [])
            record_manager.delete_record_with_Index(table_name, [temp], [])
        attr = catalog_manager.get_attribute(table_name)
        for i in attr:
            attr1 = i[0]
            if catalog_manager.check_index(
                    table_name,
                    attr1) == 1 and attr1 != conditions.attribute_name:
                index_id = catalog_manager.get_index_id(table_name, attr1)
                for infomation in ret:
                    index.delete(index_id,
                                 infomation[catalog_manager.get_attribute_cnt(
                                     table_name, attr1)],
                                 is_range=False,
                                 is_not_equal=False)
    else:
        conditionlist = []
        cond = Condition_2()
        cond.value = conditions.operand
        cond.attribute = catalog_manager.get_attribute_cnt(
            table_name, conditions.attribute_name)
        if conditions.op == '=':
            cond.type = 0
        elif conditions.op == '<':
            cond.type = 1
        elif conditions.op == '>':
            cond.type = 2
        else:
            cond.type = 3
        conditionlist.append(cond)
        ret = record_manager.select_record_with_Index(table_name, 0,
                                                      conditionlist)
        record_manager.delete_record_with_Index(table_name, 0, conditionlist)
        attr = catalog_manager.get_attribute(table_name)
        num_changed = len(ret)
        for i in attr:
            attr1 = i[0]
            if catalog_manager.check_index(table_name, attr1) == 1:
                index_id = catalog_manager.get_index_id(table_name, attr1)
                for infomation in ret:
                    index.delete(index_id,
                                 infomation[catalog_manager.get_attribute_cnt(
                                     table_name, attr1)],
                                 is_range=False,
                                 is_not_equal=False)
    return "number of records deleted:" + str(num_changed) + "\n"
Beispiel #7
0
def select_index(table_name, not_index_conditions, index_conditions,
                 if_str_command):
    templist = []
    try:
        for index_condition in index_conditions:
            index_id = catalog_manager.get_index_id(
                table_name, index_condition.attribute_name)
            if index_condition.op == '=':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_range=False,
                                    is_not_equal=False)
            elif index_condition.op == '<':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_greater=False,
                                    is_current=False,
                                    is_range=True,
                                    is_not_equal=False)
            elif index_condition.op == '>':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_greater=True,
                                    is_current=False,
                                    is_range=True,
                                    is_not_equal=False)
            elif index_condition.op == '<>':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_range=True,
                                    is_not_equal=True)
            elif index_condition.op == '<=':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_greater=False,
                                    is_current=True,
                                    is_range=True,
                                    is_not_equal=False)
            elif index_condition.op == '>=':
                temp = index.search(index_id,
                                    index_condition.operand,
                                    is_greater=True,
                                    is_current=True,
                                    is_range=True,
                                    is_not_equal=False)
            if (type(temp) == int):
                templist.append([temp])
            else:
                templist.append(temp)
    except Exception as e:
        if (if_str_command):
            ret = str(e) + "\n"
            return ret
        print(e)
    else:
        conditionlist = []
        for condi in not_index_conditions:
            cond = Condition_2()
            cond.value = condi.operand
            cond.attribute = catalog_manager.get_attribute_cnt(
                table_name, condi.attribute_name)
            if condi.op == '=':
                cond.type = 0
            elif condi.op == '<':
                cond.type = 1
            elif condi.op == '>':
                cond.type = 2
            else:
                cond.type = 3
            conditionlist.append(cond)
        if (if_str_command):
            result = ""
            ret = record_manager.select_record_with_Index(
                table_name, templist, conditionlist)
            if len(ret) == 0:
                return "empty"
            result = result + "number of record selected:" + str(
                len(ret)) + "\n"
            attr = catalog_manager.get_attribute(table_name)
            cnt = 0
            for attribute in attr:
                result = result + attribute[0] + "\t\t"
            result = result + "\n"
            for info in ret:
                if info[0] >= 1080101000:
                    result = result + str(info[0]) + "\t" + str(
                        info[1]) + "\t" + str(info[2]) + "\n"
                else:
                    result = result + str(info[0]) + "\t" + str(
                        info[1]) + "\t\t" + str(info[2]) + "\n"
            return result
        print(
            record_manager.select_record_with_Index(table_name, templist,
                                                    conditionlist))
def test_search(pip):
    index.S3_PAGINATOR.paginate.return_value = iter(S3_INDEX_RESPONSE)
    request = re.sub(
        r'\n *', '', f'''
        <?xml version='1.0'?>
        <methodCall>
          <methodName>search</methodName>
          <params>
            <param>
              <value>
                <struct>
                  <member>
                    <name>name</name>
                    <value>
                      <array>
                        <data>
                          <value>
                            <string>{pip}</string>
                          </value>
                        </data>
                      </array>
                    </value>
                  </member>
                  <member>
                    <name>summary</name>
                    <value>
                      <array>
                        <data>
                          <value>
                            <string>{pip}</string>
                          </value>
                        </data>
                      </array>
                    </value>
                  </member>
                </struct>
              </value>
            </param>
            <param>
              <value>
                <string>or</string>
              </value>
            </param>
          </params>
        </methodCall>
    ''')
    body = SEARCH.safe_substitute(data=SEARCH_VALUE.safe_substitute(
        name='fizz',
        summary='s3://serverless-pypi/fizz/fizz-1.2.3.tar.gz',
        version='1.2.3',
    ))
    ret = index.search(request)
    exp = {
        'body': body,
        'statusCode': 200,
        'headers': {
            'content-length': len(body),
            'content-type': 'text/xml; charset=utf-8',
        },
    }
    assert ret == exp
Beispiel #9
0
import index, argparse

parser = argparse.ArgumentParser(description="Searches your index.")
parser.add_argument("query", help="Search query")
args = parser.parse_args()

results = index.search(args.query)
if len(results) == 0: print("No results found.")
else:
    for i, res in enumerate(results, 1):
        print("{!s}) {} ({})".format(i, res["title"], res["url"]))