예제 #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('db')
    args = parser.parse_args()

    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config.ini')

    db = Database(config.get('database', 'user'), config.get('database', 'pw'), config.get('database', 'host'), args.db, config.get('database', 'cache_dir'), timeout=config.get('database', 'timeout'))

    rels = db.get_relations()
    for relname, rel in rels.items():
        # find non-unique indexes
        cursor = db.cursor()
        cursor.execute('SHOW INDEXES FROM {}'.format(relname))
        index_rows = cursor.fetchall()
        cursor.close()

        # drop all indexes
        unique_keys = []
        for row in index_rows:
            non_unique = row[1]
            index_name = row[2]
            column_name = row[4]
            if non_unique == 1:
                cursor = db.cursor()
                qstr = 'ALTER TABLE {} DROP INDEX {}'.format(relname, index_name)
                print(qstr)
                try:
                    cursor.execute(qstr)
                    cursor.close()
                except Exception as e:
                    if str(e).startswith('1553'):
                        print('Did not drop {}.{} because of foreign key.'.format(relname, index_name))
                    cursor.close()
            else:
                unique_keys.append(column_name)

        # add all needed indexes for each attribute
        for attr_name, attr in rel.attrs.items():
            if attr_name in unique_keys:
                continue

            if attr.type == 'num':
                cursor = db.cursor()
                qstr = 'ALTER TABLE {} ADD INDEX {}({})'.format(relname, attr_name, attr_name)
                print(qstr)
                try:
                    cursor.execute(qstr)
                    cursor.close()
                except Exception as e:
                    if str(e).startswith('1061'):
                        print('Did not add {}.{} because it was a duplicate.'.format(relname, attr_name))
                    cursor.close()

            elif attr.type == 'text':
                cursor = db.cursor()
                qstr = 'ALTER TABLE {} ADD INDEX {}({}({}))'.format(relname, attr_name, attr_name, TEXT_INDEX_SIZE)
                print(qstr)
                try:
                    cursor.execute(qstr)
                    cursor.close()
                except Exception as e:
                    if str(e).startswith('1061'):
                        print('Did not add {}.{} because it was a duplicate.'.format(relname, attr_name))
                    cursor.close()

                cursor = db.cursor()
                qstr = 'ALTER TABLE {} ADD FULLTEXT {}_ft({})'.format(relname, attr_name, attr_name)
                print(qstr)
                try:
                    cursor.execute(qstr)
                    cursor.close()
                except Exception as e:
                    if str(e).startswith('1061'):
                        print('Did not add {}.{} because it was a duplicate.'.format(relname, attr_name))
                    cursor.close()
예제 #2
0
def main():
    argparser = argparse.ArgumentParser()
    argparser.add_argument('db')
    argparser.add_argument('--email')
    args = argparser.parse_args()

    config = ConfigParser.RawConfigParser(allow_no_value=True)
    config.read('config.ini')

    db = Database(config.get('database', 'user'), config.get('database', 'pw'), config.get('database', 'host'), args.db, config.get('database', 'cache_dir'), timeout=config.get('database', 'timeout'), buffer_pool_size=config.get('database', 'buffer_pool_size'))

    parser = SQLParser(args.db, config.get('parser', 'cache_dir'))

    data = json.load(open('../data/{}.json'.format(args.db), 'r'))
    data = OrderedDict(sorted(data.items(), key=lambda x: x[0]))

    excludes = find_excludes(args.db)

    for qid, task in data.items():
        print('\nExamining Task {}...'.format(qid))
        if int(qid) in excludes:
            print('Skipping excluded query.')
            continue

        assert(len(task['ans']) == 1)

        tqid = task['ans'][0]

        tq, cached = parser.parse_one(qid, tqid, task['cqs'][tqid])
        tq_types = create_temp_table_for_tq(db, tq)
        tq_count = get_tq_count(db)

        cq_infos = []  # list of (cqid, negative of query time, intersects w/tq)
        bar = tqdm(total=len(task['cqs']), desc='Checking CQs')
        for cqid, cq_str in task['cqs'].items():
            cq, cached = parser.parse_one(qid, cqid, cq_str)

            start = time.time()
            try:
                tups = db.execute_sql(cq.query_str)
            except Exception as e:
                if not str(e).startswith('Timeout'):
                    raise e
            q_time = time.time() - start

            x_count = cq_tq_intersects(db, cqid, tqid, cq, tq, tq_count, tq_types)

            cq_infos.append((cqid, -1 * q_time, x_count))
            bar.update(1)
        bar.close()

        cq_infos.sort(key=operator.itemgetter(2, 1))

        total_q_time = sum(-1 * i[1] for i in cq_infos)
        while total_q_time > MAX_Q_TIME:
            cur_cq_info = cq_infos.pop(0)
            print('Removing CQ {}, query_time: {}, intersects: {}'.format(cur_cq_info[0], -cur_cq_info[1], cur_cq_info[2]))
            total_q_time += cur_cq_info[1]

        cursor = db.cursor()
        cursor.execute('DROP TABLE tq')
        cursor.close()

        # remaining CQs should be saved back into data
        new_cqs = {}
        for i, info in enumerate(cq_infos):
            new_cqid = format(i, '03')
            new_cqs[new_cqid] = task['cqs'][info[0]]
            if info[0] == tqid:
                task['ans'] = [new_cqid]
        task['cqs'] = OrderedDict(sorted(new_cqs.items(), key=lambda x: x[0]))

    data = OrderedDict(sorted(data.items(), key=lambda x: x[0]))

    json.dump(data, open('../data/{}.cleaned.json'.format(args.db), 'w'), indent=1)

    if args.email is not None:
        mailer = Mailer()
        mailer.send(args.email, 'Done cleaning CQs for {}'.format(args.db), 'Done')