Beispiel #1
0
def main():
    done = 0
    seen = defaultdict(int)
    for queries in queryutils.get_queries(limit=800*BYTES_IN_MB):
        for query in queries:
            template = queryutils.extract_template(query) 
            if template is None: continue
            s = template.str_tree()
            seen[s] += 1 
            done += 1
            if done % 100 == 0:
                sys.stderr.write(str(done) + " done\n")
                sys.stderr.flush()

    print "Number of templates: ", len(seen.keys())
    templates = sorted(seen.items(), key=lambda x: x[1], reverse=True)
    for (template, count) in templates:
        print "Template: \n", template
        print "Count: ", count
        print
def main():
    done = 0
    db = connect_db()
    #select_cursor = db.execute("SELECT id, text FROM queries WHERE id>?", [273347])
    select_cursor = db.execute("SELECT min(id), text FROM queries WHERE id>? GROUP BY text", [0])
    for (id, query) in select_cursor.fetchall():
        done += 1
        if done % 10 == 0:
            sys.stderr.write(str(done) + " done\n")
            sys.stderr.flush()
        check_cursor = db.execute("SELECT * FROM templates \
                                    WHERE query_id=?", [str(id)])
        if len(check_cursor.fetchall()) == 0:
            template = queryutils.extract_template(query) 
            if template is None:
                last_query = query
                last_failed = True
                continue
            s = template.dumps()
            insert_cursor = db.cursor()
            insert_cursor.execute("INSERT INTO templates (query_id, template) \
                            VALUES (?,?)", [id, s])
            db.commit()
    db.close()