Ejemplo n.º 1
0
    def query_filter(self, query, min_relevance=0, docID_start=0, docID_stop=0):
        self.socket.send(config.COMMAND_QUERY_FILTER_RELEVANCE_ID)
        parser.parse_query(query).send(self.socket)
        self.socket.send(struct.pack('f', min_relevance / 100.))
        self.socket.send(config.pack_docID(docID_start))
        self.socket.send(config.pack_docID(docID_stop))

        while True:
            docID = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0]
            if docID == 0:
                break
            yield docID
Ejemplo n.º 2
0
    def _wildcards(self, response, match, sent=''):
        pos = response.find('%')
        while pos >= 0:
            num = int(response[pos + 1:pos + 2])
            response = response[:pos] + \
                self._substitute(match.group(num)) + \
                response[pos+2:]
            pos = response.find('%')
        if len(self._data_arr) > 0:
            response = response.replace('##username##', self._data_arr[0])
            if parser.is_question(sent) > 0:
                try:
                    sql_statment = parser.parse_query(sent)
                    response = response.replace('##sql_statment##',
                                                sql_statment)
                    response = response.replace(
                        '##sql_result##', self.db.query_pretty(sql_statment))

                except Exception as e:
                    # response = str('That seems off topic! Please type help to see some questions that I can help with.')
                    response = str(e)

        else:
            response = "Please Enter your name using the format name {your name}. Example: name John Due"
        return response
Ejemplo n.º 3
0
    def query_top(self, query, size, start=0, docID_start=0, docID_stop=0):
        self.socket.send(config.COMMAND_QUERY_TOP)
        parser.parse_query(query).send(self.socket)
        self.socket.send(config.pack_docID(docID_start))
        self.socket.send(config.pack_docID(docID_stop))
        self.socket.send(struct.pack('I', start))
        self.socket.send(struct.pack('I', size))
        self.query_total = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0]

        l = []
        while True:
            docID = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0]
            if docID == 0:
                break
            l.append(docID)
        return l
Ejemplo n.º 4
0
 def test_parser_simple_query_two_relations(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt")
     query = "R.A,R.B,S.D;R,S;R.A = S.A,R.A = 4"
     root = parse_query(query, [relationR, relationS])
     result = root.run()
     self.assertEqual(result, [{"R.A":4, "R.B":5, "S.D":9}])
Ejemplo n.º 5
0
def main():
    """
    Read database files and parse into a table dictionary with tableName, table key/value pair
    """
    filenames = ['table_file_3.txt', 'table_file_1.txt', 'table_file_2.txt']
    table_dict = {}
    for dbfile in filenames:
        t = parser.pdbTable('./db/' + dbfile)
        table_dict[t.table_name] = t

    parsed_query = parser.parse_query('./db/query.txt')
    cnf = CNF()
    for q in parsed_query:
        cnf.addClause(Clause(q, table_dict))
    cnf1 = CNF()
    cnf1.addClause(Clause(parsed_query[0], table_dict))

    # print(cnf1.clauses[0].is_independent(cnf1.clauses[1]))
    # var = cnf1.clauses[0].getUCNF()
    cnf2 = CNF()
    #cnf1.addClause(Clause(parsed_query[1], table_dict))

    clause1 = Clause()
    clause2 = Clause()

    clause1.addAtom(cnf1.clauses[0].atoms[0])
    clause2.addAtom(cnf1.clauses[0].atoms[1])

    clause1.variables = set()
    clause2.variables = set()

    if (clause1.is_independent(clause2)):
        print("independent")
Ejemplo n.º 6
0
 def test_parallel_simple_selection2(self):
     relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt")
     query = "S.D;S;S.A > 5"
     root = parse_query(query, [relationS, ])
     result = root.parallel_run(3)
     self.assertEqual(compare(result, [{"S.D":3},
                                       {"S.D":2}]), True)
Ejemplo n.º 7
0
 def test_parser_nested_query(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt")
     query = "*;R;R.B in\nS.D;S;S.D < 5"
     root = parse_query(query, [relationR, relationS])
     result = root.run()
     self.assertEqual(result, [{"R.A":1, "R.B":2, "R.C":3}, ])
Ejemplo n.º 8
0
def main():
    """
    Read database files and parse into a table dictionary with tableName, table key/value pair
    """
    filenames = ['table_file_3.txt', 'table_file_1.txt', 'table_file_2.txt']
    table_dict = {}
    for dbfile in filenames:
        t = parser.pdbTable('./db/' + dbfile)
        table_dict[t.table_name] = t

    parsed_query = parser.parse_query('./db/query.txt')
    cnf = CNF()
    for q in parsed_query:
        cnf.addClause(Clause(q, table_dict))
    # cnf1 = CNF()

    # cnf1.addClause(Clause(parsed_query[0], table_dict))

    # cnf2 = CNF()
    # cnf2.addClause(Clause(parsed_query[1], table_dict))
    shared_mem = dict()
    start = time.time()
    res = lifted_inference_single(cnf)

    end = time.time()
    print(end - start)

    print("Running multi-processing")
    start = time.time()
    res = lifted_inference(cnf, 0, shared_mem, 1)
    end = time.time()
    print(end - start)
Ejemplo n.º 9
0
 def test_parallel_two_relations_fragmentation_is_not_send_func2(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     relationP = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/P.txt")
     query = "R.C,P.D;R,P;R.A > 1,P.D = 9"
     root = parse_query(query, [relationP, relationR])
     result = root.parallel_run(2)
     self.assertEqual(compare(result, [{"R.C":6, "P.D":9},
                                       {"R.C":9, "P.D":9}]), True)
Ejemplo n.º 10
0
 def test_parallel_two_relations_fragmentation_is_send_func(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt")
     query = "R.A,R.B,S.D;R,S;R.A = S.A"
     root = parse_query(query, [relationS, relationR])
     result = root.parallel_run(2)
     self.assertEqual(compare(result, [{"R.A":4, "R.B":5, "S.D":9},
                                         {"R.A":7, "R.B":8, "S.D":3}]), True)
Ejemplo n.º 11
0
 def test_parallel_simple_selection(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     query = "*;R;R.B != 5"
     root = parse_query(query, [relationR, ])
     result = root.parallel_run(2)
     self.assertEqual(compare(result, [{"R.A":1, "R.B":2, "R.C":3},
                               {"R.A":7, "R.B":8, "R.C":9},
                               {"R.A":1, "R.B":10, "R.C":11}]), True)
Ejemplo n.º 12
0
def main():
    """
    Read database files and parse into a table dictionary with tableName, table key/value pair
    """
    argparser = argparse.ArgumentParser()

    argparser.add_argument("--table",
                           nargs='+',
                           action='append',
                           help="input the file name of table")
    argparser.add_argument("--query", help="input the file name of query")
    argparser.add_argument("-d", help="database mode", action='store_true')
    argparser.add_argument("-p",
                           help="Apply Lifted Inference Rule in Parallel",
                           action='store_true')
    args = argparser.parse_args()
    query_name = args.query

    filenames = []
    table_dict = {}

    for table_arg in args.table:
        if len(table_arg) != 1:
            print(
                "Illegal argument, table option takes exactly one file at a time"
            )
            sys.exit(1)
        filenames.append(table_arg[0])

    for dbfile in filenames:
        t = parser.pdbTable(dbfile)
        table_dict[t.table_name] = t
    parsed_query = parser.parse_query(query_name)
    db = None
    if args.d:
        db_file = 'prob.db'
        db = SQL_DB(filenames, db_file)
    shared_mem = None
    p_id = None
    if args.p:
        manager = multiprocessing.Manager()
        shared_mem = manager.dict()
        p_id = "start"
    cnf = CNF()
    for q in parsed_query:
        cnf.addClause(Clause(q, table_dict))
    start = time.time()
    res = 1 - lifted_inference(cnf, db, shared_mem, p_id)
    end = time.time()
    print("####\t Total time taken is \t{0:0.2f} seconds".format(end - start))
    print("####\t Probability is:\t{0:0.4f}".format(
        1 - lifted_inference(cnf, db, shared_mem, p_id)))
Ejemplo n.º 13
0
def query_loop(text):
    step = 0
    input = ""
    #quantity name
    lastinput = ""
    while (not (input == "quit" or input == "exit")):
        print "\033[93m"
        print "enter a query:\033[0m"
        input = raw_input()
        if not input.strip() == "":
            if ('\x1b' in input):
                input = lastinput
            input = input.lower()

            if (not simple_query(input, text, step)):
                query_type = parser.parse_query(input)
                answerer.answer_question(query_type, text)
            lastinput = input
Ejemplo n.º 14
0
def main(index_path):
    #with open(index_path, 'rb') as index_data:
    #    vocabulary = cPickle.load(index_data)
    with open('id2url.pkl', 'rb') as id2url:
        url_from_id = cPickle.load(id2url)
    with open('offsets.pkl', 'rb') as offsets_file:
        lines = offsets_file.readlines()
        offset = []
        for line in lines:
            offset.append(int(line))
    while True:
        try:
            initial_query = raw_input()
            query = initial_query.decode('utf-8').lower()
            urls = parse_query(query).evaluate(index_path, len(url_from_id),
                                               offset)
            answer = map(url_from_id.__getitem__, urls)
            print(initial_query)
            print(len(answer))
            print('\n'.join(map(str, answer)))
        except EOFError:
            return
Ejemplo n.º 15
0
def calculate(data, q='', k=None, f=len, gb=None):
    q = parser.parse_query(q) if not callable(q) else q
    f = funcs[f] if not callable(f) else f
    results = []
    for d in data:
        if not q(d):
            continue
        results += [d]
    groups = {}
    if gb not in [None, '']:
        for r in results:
            if gb not in r:
                continue
            gbk = r[gb]
            if gbk not in groups:
                groups[gbk] = []
            groups[gbk] += [r[k] if k is not None else r]
        for g in groups:
            groups[g] = (len(groups[g]), f(groups[g]))
        items = groups.items()
        return (len(items), items)
    results = [r[k] if k not in ['', None] else r for r in results]
    return (len(results), f(results))
    else:
        ###query_tree.is_term == True
        term_hash = mmh3.hash64(query_tree.value)[0]
        if(term_hash in Term_dict):
            substr = InvIndexEncoded[Term_dict[term_hash][0] : Term_dict[term_hash][0] + Term_dict[term_hash][1]]
            return set(encoder.decode(substr))
        else:
            return set()


URLs = load_obj("urls")
Term_dict = load_obj("dict")
fd = open("encoder.txt", "r")
encoder_type = fd.readline()
fd.close()
if(encoder_type == "varbyte"):
    encoder = encoding.Varbyte()
else:
    encoder = encoding.Simple9()
fd = open("InvIndexEncoded.txt", "r")
InvIndexEncoded = fd.read()
fd.close()
for line in sys.stdin:
    line = re.sub("\n", "", line, flags=re.UNICODE)
    query_tree = parser.parse_query(line.decode('UTF-8').lower())
    result = exec_tree(query_tree, Term_dict, InvIndexEncoded, len(URLs))
    print line
    print len(result)
    for docID in sorted(result):
        print URLs[docID]
Ejemplo n.º 17
0
        urls = cPickle.load(url_file)

    with open('dict_file.txt', 'rb') as dict_file:
        dict = cPickle.load(dict_file)

    index_file = open("index.txt", "rb")

    while True:
        try:
            line = raw_input()

            #print("input raw line: {}".format(line))
            print(line)
            question = line.decode('utf-8').lower()

            q = parser.parse_query(question)
            #print("query: {}".format(q))

            number_of_urls = 0
            result, flag = parser.get_q_list_urls(q, dict, index_file)

            print(len(result))
            for doc_id in result:
                try:
                    print(urls[doc_id])
                except:
                    print(doc_id)
        except:
            index_file.close()
            break
Ejemplo n.º 18
0
 def test_parser_simple_query(self):
     relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt")
     query = "R.B;R;R.A = 1"
     root = parse_query(query, [relationR, ])
     result = root.run()
     self.assertEqual(result, [{"R.B":2}, {"R.B":10}])
Ejemplo n.º 19
0
def generate_param_file(index_path, query, res_num, query_terms):
    f = tempfile.NamedTemporaryFile(delete=True)
    f.write(index_path + '\n')
    f.write(query + '\n')
    f.write(str(res_num) + '\n')
    for query_term in query_terms:
        f.write(query_term + '\n')
    f.flush()
    return f

if __name__ == '__main__':
    option = sys.argv[1]
    argv = sys.argv[2:]
    if option == '--example':
        indri_query, query_terms = generate_indri_query(str_to_parsed_query("[ (NE, ['Mexican', 'Food']), (Non-NE, ['little', 'wonder']), (None, ['strong']) ]"), 50, 20)
        f = generate_param_file('../data/index', indri_query, 3, query_terms)
        subprocess.call(['cpp/Search', f.name])
        f.close()
    elif option == '--search' or option == '--search-with-parsed-query':
        index_path, search_file, query_or_parsed, passage_len, passage_inc, res_num = argv
        if option == '--search':
            query = parse_query(query_or_parsed)
        else:
            query = str_to_parsed_query(query_or_parsed)
        indri_query, query_terms = generate_indri_query(query, passage_len, passage_inc)
        f = generate_param_file(index_path, indri_query, res_num, query_terms)
        subprocess.call(['%s' % search_file, f.name])
        f.close()