Example #1
0
def clean_db():
    execute("""
DELETE FROM article WHERE TRUE ;
DELETE FROM book WHERE TRUE ;
DELETE FROM incollection WHERE TRUE ;
DELETE FROM inproceedings WHERE TRUE ;
DELETE FROM article_author WHERE TRUE ;

DELETE FROM article_author WHERE TRUE ;
DELETE FROM book_author WHERE TRUE ;
DELETE FROM incollection_author WHERE TRUE ;
DELETE FROM inproceedings_author WHERE TRUE ;

ALTER SEQUENCE article_pub_id_seq RESTART 1;
ALTER SEQUENCE book_pub_id_seq RESTART 1;
ALTER SEQUENCE incollection_pub_id_seq RESTART 1;
ALTER SEQUENCE inproceedings_pub_id_seq RESTART 1;


--DELETE FROM author WHERE TRUE ;
--ALTER SEQUENCE author_author_id_seq RESTART 1;
--
--INSERT INTO author (
--  SELECT DISTINCT ON (y.v)
--    NEXTVAL('author_author_id_seq'),
--    y.v,
--    result.v
--  FROM Pub x, Field y, Field result
--  WHERE x.k = y.k AND y.k = result.k AND x.p = 'www' AND y.p = 'author' AND result.p = 'url'
--
--);
    """)

    print("Done Cleaning DB...")
Example #2
0
def query(server, dataset, query, file, measure, format, output, display):
    # Extracts the query from the arguments
    if file is not None:
        query = open(file, 'r').read()
    if query is None:
        logger.error('You have to input a SPARQL query')
        sys.exit(1)
    # Evaluates the query
    result, stats = engine.execute(server, dataset, query)
    # Writes the query result
    if measure is not None:
        with open(measure, 'w') as out_file:
            out_file.write(
                f'{stats.execution_time()},{stats.nb_calls()},{stats.data_transfer()}'
            )
    data = None
    if format == 'w3c/json':
        data = sparql_json(result.get())
    elif format == 'w3c/xml':
        data = sparql_xml(result.get())
    elif format == 'json':
        data = json.dumps(result.get())
    else:
        data = json.dumps(result.get())
    if display:
        logger.info(data)
    if output is not None:
        with open(output, 'w') as out_file:
            out_file.write(data)
    logger.info(f'Evaluation metrics: \
        \ntime: {stats.execution_time()} sec \
        \ntransfer: {stats.data_transfer()} bytes \
        \ncalls: {stats.nb_calls()} \
        \nresult: {stats.nb_result()}')
def extract_row(row, columns):
    foreign_keys = {
        'know_for_titles': [('title_basics', 'tconst'),
                            ('title_ratings', 'tconst')]
    }
    line = ""
    for column_name, column_type in columns:
        if column_name not in row or column_name in ignorable_columns:
            continue
        if column_name in foreign_keys:
            if isinstance(row[column_name], list) or isinstance(
                    row[column_name], tuple):
                for key in row[column_name]:
                    for table, column_id in foreign_keys[column_name]:
                        res = execute(
                            "SELECT * FROM {} WHERE {} = '{}'".format(
                                table, column_id, key))
                        cols = execute(get_columns(table)).fetchall()

                        for ro in res:
                            line += extract_row(ro, cols)
            else:
                key = row[column_name]
                for table, column_id in foreign_keys[column_name]:
                    res = execute("SELECT * FROM {} WHERE {} = '{}'".format(
                        table, column_id, key))
                    cols = execute(get_columns(table)).fetchall()
                    for ro in res:
                        line += extract_row(ro, cols)
        else:
            if row[column_name] is None:
                line += tokenize3(column_name, "NULL")
            else:
                if column_type == 'ARRAY':
                    line += " ".join([
                        tokenize3(column_name, str(val))
                        for val in row[column_name]
                    ]) + " "
                else:
                    line += tokenize3(column_name, (str(row[column_name])))
    return line
def insert_a(db_index, table, execute, val):
    author = str(val).replace("'", "")

    if author in author_dict:
        author_id = [author_dict[author]]
    else:
        try:
            (author_id,) = execute(
                "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author))
        except:
            try:
                execute("INSERT INTO author (author_name, author_homepage) VALUES ('{}',NULL)".format(author))
                (author_id,) = execute(
                    "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author))
            except:
                (author_id,) = execute(
                    "SELECT author_id FROM author WHERE author.author_name = '{}'".format(author))

    execute("INSERT INTO {}_author (pub_id,author_id) values ({},{})".format(table, db_index[0], author_id[0]))
Example #5
0
def main():
    if args.controller and not args.no_stats:
        print(
            "ERROR: controller mode does not work"
            " without --no_stats argument..."
        )
        return 1

    pid = execute(args.file_path)
    if pid == -1:
        return 1

    if args.controller:
        print(
            "Controller Mode\n"
            " - Use WASD keys to move around the maze\n"
            " - Press F to inspect\n"
            " - Press E to exit"
        )
        Controller(args.debug).explore_maze()
    else:
        print(
            "Classic maze solver\n"
            "Waiting for engine..."
        )
        sleep(3)
        maze = Maze()
        if args.no_stats:
            print(f"Total cells visited: {len(maze.visited)}")
        else:
            old_data = get_pickle_obj(maze.colors_xy)
            print("Show statistics!")
            plt_colors_dist(maze.colors_count)
            plt_xy_dist(maze.colors_xy, old_data)
            plt_map(maze.visited)

    kill(pid)
    return 0
Example #6
0
    def POST(self, options):
        """ Use the options selected by the user to execute all algorithms
        :param options: {
                    is_default_dataset: bool,
                    dataset: str,
                    providers: []
                    algorithms: []
                    target: str
                }
       if is_default_dataset is true, dataset will contain the name of the default_dataset"""

        options_dic = json.loads(options)

        try:
            result = engine.execute(options_dic['is_default_dataset'],
                                    options_dic['dataset'],
                                    options_dic['providers'],
                                    options_dic['algorithms'],
                                    options_dic['target'])
        except Exception as exception:
            message = f"{str(exception)}"
            raise cherrypy.HTTPError(500, message=message)

        return result
from engine import execute

authors = execute('SELECT author_id, author_name FROM author')

author_dict = {}

for _author in authors:
    author_dict[_author.author_name] = _author.author_id


def insert(execute, tn, fields, table, current_dict,
           current_pub_id, db_index):
    values = [db_index[0], current_pub_id]
    for field in fields[tn][2:]:
        if field in current_dict:
            values.append(current_dict[field].replace('"', '').replace("'", ""))
        else:
            values.append('')

    s = "INSERT INTO {} {}  VALUES {}".format(table, str(tuple(fields[tn])).replace("'", ""), tuple(values))
    s = s.replace("%", "%%")

    # execute(s)
    open("file", "a").write(s + ";\n")


def insert_a(db_index, table, execute, val):
    author = str(val).replace("'", "")

    if author in author_dict:
        author_id = [author_dict[author]]
Example #8
0
    class InsertA(threading.Thread):
        def __init__(self, db_index, table, execute, val):
            threading.Thread.__init__(self)
            self.db_index = db_index
            self.table = table
            self.execute = execute
            self.val = val

        def run(self):
            insert_a(self.db_index, self.table, self.execute, self.val)

    threads = []
    for tn, table in enumerate(tables):
        result = execute(
            "SELECT field.* FROM field, pub WHERE pub.k=field.k AND pub.p= '{}' ORDER BY field.k"
            .format(table))
        print("Query done")
        length = result.rowcount
        print("Read result for", table, "length")
        current_pub_id = ''
        db_index = 0

        current_dict = {}
        values = []
        r = 1
        i = 0
        store = []
        oldR = None
        found_last = False
        while r is not None:
def create_corpus(tokenize, file_name):
    file_name = corpus + file_name
    threads = []
    max_threads = 50
    length = 1
    chunk = 0
    chunk_size = 6000000
    with open(file_name, 'w') as file:
        file.write("")

    columns = []

    statements = [
        """
        SELECT au.author_name, au.author_homepage,
               ar.title, ar.year, ar.month, ar.volume, ar.journal, ar.number
         FROM author au JOIN article_author aa ON aa.author_id = au.author_id JOIN article ar ON 
        ar.pub_id = aa.pub_id
        """,
        """
        SELECT au.author_name, au.author_homepage,
               b.title,b.year,b.isbn,b.publisher
         FROM author au JOIN book_author ba ON ba.author_id = au.author_id JOIN book b ON 
        ba.pub_id = b.pub_id
        """,
        """
        SELECT au.author_name, au.author_homepage,
              i.title,i.year,i.isbn,i.booktitle
         FROM author au JOIN inproceedings_author ia ON ia.author_id = au.author_id JOIN inproceedings i ON 
        ia.pub_id = i.pub_id
        """,
        """
        SELECT au.author_name, au.author_homepage,
               i.title,i.year,i.isbn,i.booktitle,i.editor
         FROM author au JOIN inproceedings_author ia ON ia.author_id = au.author_id JOIN inproceedings i ON 
        i.pub_id = ia.pub_id
        """,
    ]

    s = 0
    out = ""
    for statement in statements:
        print("Executing statement ", s)
        s += 1

        # select entries in chunks to speed things up
        while length != 0:
            result = execute(
                (statement +
                 """ ORDER BY aa.author_id LIMIT {} OFFSET {}""").format(
                     chunk_size, chunk))
            if len(columns) == 0:
                columns = [col[0] for col in result.cursor.description]
            print("Done reading")

            length = result.rowcount
            print("Rowcount ", length)
            for i, row in enumerate(result):
                if i % (chunk_size / 2) == 0:
                    print(i, 'of', length)
                    with open(file_name, 'a') as file:
                        file.write(out)
                        out = ""

                line = ""
                row = [item for item in row]
                # arbitrary selection of columns
                cols_of_interest = range(len(row))
                for index in cols_of_interest:
                    item = row[index]
                    if isinstance(item, list):
                        item = [tokenize(columns[index], str(e)) for e in item]
                        line += " ".join(item) + " "
                    else:
                        if item is None:
                            line += tokenize(columns[index], "NULL") + " "
                        else:
                            line += tokenize(columns[index], str(item)) + " "

                out += delete_double_spaces(line + "\n")

            chunk = chunk + chunk_size
            print('Inserted entries', chunk)

        if len(out) > 0:
            with open(file_name, 'a') as file:
                file.write(out)
                out = ""
Example #10
0
import sys
import engine

if __name__ == '__main__':
    sys.exit(engine.execute(sys.argv))