def clean_db(): execute(""" DELETE FROM article WHERE TRUE ; DELETE FROM book WHERE TRUE ; DELETE FROM incollection WHERE TRUE ; DELETE FROM inproceedings WHERE TRUE ; DELETE FROM article_author WHERE TRUE ; DELETE FROM article_author WHERE TRUE ; DELETE FROM book_author WHERE TRUE ; DELETE FROM incollection_author WHERE TRUE ; DELETE FROM inproceedings_author WHERE TRUE ; ALTER SEQUENCE article_pub_id_seq RESTART 1; ALTER SEQUENCE book_pub_id_seq RESTART 1; ALTER SEQUENCE incollection_pub_id_seq RESTART 1; ALTER SEQUENCE inproceedings_pub_id_seq RESTART 1; --DELETE FROM author WHERE TRUE ; --ALTER SEQUENCE author_author_id_seq RESTART 1; -- --INSERT INTO author ( -- SELECT DISTINCT ON (y.v) -- NEXTVAL('author_author_id_seq'), -- y.v, -- result.v -- FROM Pub x, Field y, Field result -- WHERE x.k = y.k AND y.k = result.k AND x.p = 'www' AND y.p = 'author' AND result.p = 'url' -- --); """) print("Done Cleaning DB...")
def query(server, dataset, query, file, measure, format, output, display): # Extracts the query from the arguments if file is not None: query = open(file, 'r').read() if query is None: logger.error('You have to input a SPARQL query') sys.exit(1) # Evaluates the query result, stats = engine.execute(server, dataset, query) # Writes the query result if measure is not None: with open(measure, 'w') as out_file: out_file.write( f'{stats.execution_time()},{stats.nb_calls()},{stats.data_transfer()}' ) data = None if format == 'w3c/json': data = sparql_json(result.get()) elif format == 'w3c/xml': data = sparql_xml(result.get()) elif format == 'json': data = json.dumps(result.get()) else: data = json.dumps(result.get()) if display: logger.info(data) if output is not None: with open(output, 'w') as out_file: out_file.write(data) logger.info(f'Evaluation metrics: \ \ntime: {stats.execution_time()} sec \ \ntransfer: {stats.data_transfer()} bytes \ \ncalls: {stats.nb_calls()} \ \nresult: {stats.nb_result()}')
def extract_row(row, columns): foreign_keys = { 'know_for_titles': [('title_basics', 'tconst'), ('title_ratings', 'tconst')] } line = "" for column_name, column_type in columns: if column_name not in row or column_name in ignorable_columns: continue if column_name in foreign_keys: if isinstance(row[column_name], list) or isinstance( row[column_name], tuple): for key in row[column_name]: for table, column_id in foreign_keys[column_name]: res = execute( "SELECT * FROM {} WHERE {} = '{}'".format( table, column_id, key)) cols = execute(get_columns(table)).fetchall() for ro in res: line += extract_row(ro, cols) else: key = row[column_name] for table, column_id in foreign_keys[column_name]: res = execute("SELECT * FROM {} WHERE {} = '{}'".format( table, column_id, key)) cols = execute(get_columns(table)).fetchall() for ro in res: line += extract_row(ro, cols) else: if row[column_name] is None: line += tokenize3(column_name, "NULL") else: if column_type == 'ARRAY': line += " ".join([ tokenize3(column_name, str(val)) for val in row[column_name] ]) + " " else: line += tokenize3(column_name, (str(row[column_name]))) return line
def insert_a(db_index, table, execute, val): author = str(val).replace("'", "") if author in author_dict: author_id = [author_dict[author]] else: try: (author_id,) = execute( "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author)) except: try: execute("INSERT INTO author (author_name, author_homepage) VALUES ('{}',NULL)".format(author)) (author_id,) = execute( "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author)) except: (author_id,) = execute( "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author)) execute("INSERT INTO {}_author (pub_id,author_id) values ({},{})".format(table, db_index[0], author_id[0]))
def main(): if args.controller and not args.no_stats: print( "ERROR: controller mode does not work" " without --no_stats argument..." ) return 1 pid = execute(args.file_path) if pid == -1: return 1 if args.controller: print( "Controller Mode\n" " - Use WASD keys to move around the maze\n" " - Press F to inspect\n" " - Press E to exit" ) Controller(args.debug).explore_maze() else: print( "Classic maze solver\n" "Waiting for engine..." ) sleep(3) maze = Maze() if args.no_stats: print(f"Total cells visited: {len(maze.visited)}") else: old_data = get_pickle_obj(maze.colors_xy) print("Show statistics!") plt_colors_dist(maze.colors_count) plt_xy_dist(maze.colors_xy, old_data) plt_map(maze.visited) kill(pid) return 0
def POST(self, options): """ Use the options selected by the user to execute all algorithms :param options: { is_default_dataset: bool, dataset: str, providers: [] algorithms: [] target: str } if is_default_dataset is true, dataset will contain the name of the default_dataset""" options_dic = json.loads(options) try: result = engine.execute(options_dic['is_default_dataset'], options_dic['dataset'], options_dic['providers'], options_dic['algorithms'], options_dic['target']) except Exception as exception: message = f"{str(exception)}" raise cherrypy.HTTPError(500, message=message) return result
from engine import execute authors = execute('SELECT author_id, author_name FROM author') author_dict = {} for _author in authors: author_dict[_author.author_name] = _author.author_id def insert(execute, tn, fields, table, current_dict, current_pub_id, db_index): values = [db_index[0], current_pub_id] for field in fields[tn][2:]: if field in current_dict: values.append(current_dict[field].replace('"', '').replace("'", "")) else: values.append('') s = "INSERT INTO {} {} VALUES {}".format(table, str(tuple(fields[tn])).replace("'", ""), tuple(values)) s = s.replace("%", "%%") # execute(s) open("file", "a").write(s + ";\n") def insert_a(db_index, table, execute, val): author = str(val).replace("'", "") if author in author_dict: author_id = [author_dict[author]]
class InsertA(threading.Thread): def __init__(self, db_index, table, execute, val): threading.Thread.__init__(self) self.db_index = db_index self.table = table self.execute = execute self.val = val def run(self): insert_a(self.db_index, self.table, self.execute, self.val) threads = [] for tn, table in enumerate(tables): result = execute( "SELECT field.* FROM field, pub WHERE pub.k=field.k AND pub.p= '{}' ORDER BY field.k" .format(table)) print("Query done") length = result.rowcount print("Read result for", table, "length") current_pub_id = '' db_index = 0 current_dict = {} values = [] r = 1 i = 0 store = [] oldR = None found_last = False while r is not None:
def create_corpus(tokenize, file_name): file_name = corpus + file_name threads = [] max_threads = 50 length = 1 chunk = 0 chunk_size = 6000000 with open(file_name, 'w') as file: file.write("") columns = [] statements = [ """ SELECT au.author_name, au.author_homepage, ar.title, ar.year, ar.month, ar.volume, ar.journal, ar.number FROM author au JOIN article_author aa ON aa.author_id = au.author_id JOIN article ar ON ar.pub_id = aa.pub_id """, """ SELECT au.author_name, au.author_homepage, b.title,b.year,b.isbn,b.publisher FROM author au JOIN book_author ba ON ba.author_id = au.author_id JOIN book b ON ba.pub_id = b.pub_id """, """ SELECT au.author_name, au.author_homepage, i.title,i.year,i.isbn,i.booktitle FROM author au JOIN inproceedings_author ia ON ia.author_id = au.author_id JOIN inproceedings i ON ia.pub_id = i.pub_id """, """ SELECT au.author_name, au.author_homepage, i.title,i.year,i.isbn,i.booktitle,i.editor FROM author au JOIN inproceedings_author ia ON ia.author_id = au.author_id JOIN inproceedings i ON i.pub_id = ia.pub_id """, ] s = 0 out = "" for statement in statements: print("Executing statement ", s) s += 1 # select entries in chunks to speed things up while length != 0: result = execute( (statement + """ ORDER BY aa.author_id LIMIT {} OFFSET {}""").format( chunk_size, chunk)) if len(columns) == 0: columns = [col[0] for col in result.cursor.description] print("Done reading") length = result.rowcount print("Rowcount ", length) for i, row in enumerate(result): if i % (chunk_size / 2) == 0: print(i, 'of', length) with open(file_name, 'a') as file: file.write(out) out = "" line = "" row = [item for item in row] # arbitrary selection of columns cols_of_interest = range(len(row)) for index in cols_of_interest: item = row[index] if isinstance(item, list): item = [tokenize(columns[index], str(e)) for e in item] line += " ".join(item) + " " else: if item is None: line += tokenize(columns[index], "NULL") + " " else: line += tokenize(columns[index], str(item)) + " " out += delete_double_spaces(line + "\n") chunk = chunk + chunk_size print('Inserted entries', chunk) if len(out) > 0: with open(file_name, 'a') as file: file.write(out) out = ""
import sys import engine if __name__ == '__main__': sys.exit(engine.execute(sys.argv))