def query_filter(self, query, min_relevance=0, docID_start=0, docID_stop=0): self.socket.send(config.COMMAND_QUERY_FILTER_RELEVANCE_ID) parser.parse_query(query).send(self.socket) self.socket.send(struct.pack('f', min_relevance / 100.)) self.socket.send(config.pack_docID(docID_start)) self.socket.send(config.pack_docID(docID_stop)) while True: docID = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0] if docID == 0: break yield docID
def _wildcards(self, response, match, sent=''): pos = response.find('%') while pos >= 0: num = int(response[pos + 1:pos + 2]) response = response[:pos] + \ self._substitute(match.group(num)) + \ response[pos+2:] pos = response.find('%') if len(self._data_arr) > 0: response = response.replace('##username##', self._data_arr[0]) if parser.is_question(sent) > 0: try: sql_statment = parser.parse_query(sent) response = response.replace('##sql_statment##', sql_statment) response = response.replace( '##sql_result##', self.db.query_pretty(sql_statment)) except Exception as e: # response = str('That seems off topic! Please type help to see some questions that I can help with.') response = str(e) else: response = "Please Enter your name using the format name {your name}. Example: name John Due" return response
def query_top(self, query, size, start=0, docID_start=0, docID_stop=0): self.socket.send(config.COMMAND_QUERY_TOP) parser.parse_query(query).send(self.socket) self.socket.send(config.pack_docID(docID_start)) self.socket.send(config.pack_docID(docID_stop)) self.socket.send(struct.pack('I', start)) self.socket.send(struct.pack('I', size)) self.query_total = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0] l = [] while True: docID = struct.unpack('I', self.socket.recv(struct.calcsize('I')))[0] if docID == 0: break l.append(docID) return l
def test_parser_simple_query_two_relations(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt") query = "R.A,R.B,S.D;R,S;R.A = S.A,R.A = 4" root = parse_query(query, [relationR, relationS]) result = root.run() self.assertEqual(result, [{"R.A":4, "R.B":5, "S.D":9}])
def main(): """ Read database files and parse into a table dictionary with tableName, table key/value pair """ filenames = ['table_file_3.txt', 'table_file_1.txt', 'table_file_2.txt'] table_dict = {} for dbfile in filenames: t = parser.pdbTable('./db/' + dbfile) table_dict[t.table_name] = t parsed_query = parser.parse_query('./db/query.txt') cnf = CNF() for q in parsed_query: cnf.addClause(Clause(q, table_dict)) cnf1 = CNF() cnf1.addClause(Clause(parsed_query[0], table_dict)) # print(cnf1.clauses[0].is_independent(cnf1.clauses[1])) # var = cnf1.clauses[0].getUCNF() cnf2 = CNF() #cnf1.addClause(Clause(parsed_query[1], table_dict)) clause1 = Clause() clause2 = Clause() clause1.addAtom(cnf1.clauses[0].atoms[0]) clause2.addAtom(cnf1.clauses[0].atoms[1]) clause1.variables = set() clause2.variables = set() if (clause1.is_independent(clause2)): print("independent")
def test_parallel_simple_selection2(self): relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt") query = "S.D;S;S.A > 5" root = parse_query(query, [relationS, ]) result = root.parallel_run(3) self.assertEqual(compare(result, [{"S.D":3}, {"S.D":2}]), True)
def test_parser_nested_query(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt") query = "*;R;R.B in\nS.D;S;S.D < 5" root = parse_query(query, [relationR, relationS]) result = root.run() self.assertEqual(result, [{"R.A":1, "R.B":2, "R.C":3}, ])
def main(): """ Read database files and parse into a table dictionary with tableName, table key/value pair """ filenames = ['table_file_3.txt', 'table_file_1.txt', 'table_file_2.txt'] table_dict = {} for dbfile in filenames: t = parser.pdbTable('./db/' + dbfile) table_dict[t.table_name] = t parsed_query = parser.parse_query('./db/query.txt') cnf = CNF() for q in parsed_query: cnf.addClause(Clause(q, table_dict)) # cnf1 = CNF() # cnf1.addClause(Clause(parsed_query[0], table_dict)) # cnf2 = CNF() # cnf2.addClause(Clause(parsed_query[1], table_dict)) shared_mem = dict() start = time.time() res = lifted_inference_single(cnf) end = time.time() print(end - start) print("Running multi-processing") start = time.time() res = lifted_inference(cnf, 0, shared_mem, 1) end = time.time() print(end - start)
def test_parallel_two_relations_fragmentation_is_not_send_func2(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") relationP = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/P.txt") query = "R.C,P.D;R,P;R.A > 1,P.D = 9" root = parse_query(query, [relationP, relationR]) result = root.parallel_run(2) self.assertEqual(compare(result, [{"R.C":6, "P.D":9}, {"R.C":9, "P.D":9}]), True)
def test_parallel_two_relations_fragmentation_is_send_func(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") relationS = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/S.txt") query = "R.A,R.B,S.D;R,S;R.A = S.A" root = parse_query(query, [relationS, relationR]) result = root.parallel_run(2) self.assertEqual(compare(result, [{"R.A":4, "R.B":5, "S.D":9}, {"R.A":7, "R.B":8, "S.D":3}]), True)
def test_parallel_simple_selection(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") query = "*;R;R.B != 5" root = parse_query(query, [relationR, ]) result = root.parallel_run(2) self.assertEqual(compare(result, [{"R.A":1, "R.B":2, "R.C":3}, {"R.A":7, "R.B":8, "R.C":9}, {"R.A":1, "R.B":10, "R.C":11}]), True)
def main(): """ Read database files and parse into a table dictionary with tableName, table key/value pair """ argparser = argparse.ArgumentParser() argparser.add_argument("--table", nargs='+', action='append', help="input the file name of table") argparser.add_argument("--query", help="input the file name of query") argparser.add_argument("-d", help="database mode", action='store_true') argparser.add_argument("-p", help="Apply Lifted Inference Rule in Parallel", action='store_true') args = argparser.parse_args() query_name = args.query filenames = [] table_dict = {} for table_arg in args.table: if len(table_arg) != 1: print( "Illegal argument, table option takes exactly one file at a time" ) sys.exit(1) filenames.append(table_arg[0]) for dbfile in filenames: t = parser.pdbTable(dbfile) table_dict[t.table_name] = t parsed_query = parser.parse_query(query_name) db = None if args.d: db_file = 'prob.db' db = SQL_DB(filenames, db_file) shared_mem = None p_id = None if args.p: manager = multiprocessing.Manager() shared_mem = manager.dict() p_id = "start" cnf = CNF() for q in parsed_query: cnf.addClause(Clause(q, table_dict)) start = time.time() res = 1 - lifted_inference(cnf, db, shared_mem, p_id) end = time.time() print("####\t Total time taken is \t{0:0.2f} seconds".format(end - start)) print("####\t Probability is:\t{0:0.4f}".format( 1 - lifted_inference(cnf, db, shared_mem, p_id)))
def query_loop(text): step = 0 input = "" #quantity name lastinput = "" while (not (input == "quit" or input == "exit")): print "\033[93m" print "enter a query:\033[0m" input = raw_input() if not input.strip() == "": if ('\x1b' in input): input = lastinput input = input.lower() if (not simple_query(input, text, step)): query_type = parser.parse_query(input) answerer.answer_question(query_type, text) lastinput = input
def main(index_path): #with open(index_path, 'rb') as index_data: # vocabulary = cPickle.load(index_data) with open('id2url.pkl', 'rb') as id2url: url_from_id = cPickle.load(id2url) with open('offsets.pkl', 'rb') as offsets_file: lines = offsets_file.readlines() offset = [] for line in lines: offset.append(int(line)) while True: try: initial_query = raw_input() query = initial_query.decode('utf-8').lower() urls = parse_query(query).evaluate(index_path, len(url_from_id), offset) answer = map(url_from_id.__getitem__, urls) print(initial_query) print(len(answer)) print('\n'.join(map(str, answer))) except EOFError: return
def calculate(data, q='', k=None, f=len, gb=None): q = parser.parse_query(q) if not callable(q) else q f = funcs[f] if not callable(f) else f results = [] for d in data: if not q(d): continue results += [d] groups = {} if gb not in [None, '']: for r in results: if gb not in r: continue gbk = r[gb] if gbk not in groups: groups[gbk] = [] groups[gbk] += [r[k] if k is not None else r] for g in groups: groups[g] = (len(groups[g]), f(groups[g])) items = groups.items() return (len(items), items) results = [r[k] if k not in ['', None] else r for r in results] return (len(results), f(results))
else: ###query_tree.is_term == True term_hash = mmh3.hash64(query_tree.value)[0] if(term_hash in Term_dict): substr = InvIndexEncoded[Term_dict[term_hash][0] : Term_dict[term_hash][0] + Term_dict[term_hash][1]] return set(encoder.decode(substr)) else: return set() URLs = load_obj("urls") Term_dict = load_obj("dict") fd = open("encoder.txt", "r") encoder_type = fd.readline() fd.close() if(encoder_type == "varbyte"): encoder = encoding.Varbyte() else: encoder = encoding.Simple9() fd = open("InvIndexEncoded.txt", "r") InvIndexEncoded = fd.read() fd.close() for line in sys.stdin: line = re.sub("\n", "", line, flags=re.UNICODE) query_tree = parser.parse_query(line.decode('UTF-8').lower()) result = exec_tree(query_tree, Term_dict, InvIndexEncoded, len(URLs)) print line print len(result) for docID in sorted(result): print URLs[docID]
urls = cPickle.load(url_file) with open('dict_file.txt', 'rb') as dict_file: dict = cPickle.load(dict_file) index_file = open("index.txt", "rb") while True: try: line = raw_input() #print("input raw line: {}".format(line)) print(line) question = line.decode('utf-8').lower() q = parser.parse_query(question) #print("query: {}".format(q)) number_of_urls = 0 result, flag = parser.get_q_list_urls(q, dict, index_file) print(len(result)) for doc_id in result: try: print(urls[doc_id]) except: print(doc_id) except: index_file.close() break
def test_parser_simple_query(self): relationR = RelationNode("/Users/Lena/Documents/Study/Parallel_DB/R.txt") query = "R.B;R;R.A = 1" root = parse_query(query, [relationR, ]) result = root.run() self.assertEqual(result, [{"R.B":2}, {"R.B":10}])
def generate_param_file(index_path, query, res_num, query_terms): f = tempfile.NamedTemporaryFile(delete=True) f.write(index_path + '\n') f.write(query + '\n') f.write(str(res_num) + '\n') for query_term in query_terms: f.write(query_term + '\n') f.flush() return f if __name__ == '__main__': option = sys.argv[1] argv = sys.argv[2:] if option == '--example': indri_query, query_terms = generate_indri_query(str_to_parsed_query("[ (NE, ['Mexican', 'Food']), (Non-NE, ['little', 'wonder']), (None, ['strong']) ]"), 50, 20) f = generate_param_file('../data/index', indri_query, 3, query_terms) subprocess.call(['cpp/Search', f.name]) f.close() elif option == '--search' or option == '--search-with-parsed-query': index_path, search_file, query_or_parsed, passage_len, passage_inc, res_num = argv if option == '--search': query = parse_query(query_or_parsed) else: query = str_to_parsed_query(query_or_parsed) indri_query, query_terms = generate_indri_query(query, passage_len, passage_inc) f = generate_param_file(index_path, indri_query, res_num, query_terms) subprocess.call(['%s' % search_file, f.name]) f.close()