예제 #1
0
def load_parsed(db):
    db.connect()
    cursor = db.execute("SELECT id, text FROM queries")
    for row in cursor.fetchall():
        d = { k:row[k] for k in row.keys() }
        parsetree = parse_query(row["text"])
        if parsetree is not None:
            parsetree.query_id = row["id"]
            insert_parsetree(db, parsetree)
    db.close()
예제 #2
0
def load_table(database):
    db = connect(database)
    cursor = db.execute("SELECT id, text FROM queries")
    for (qid, query) in cursor.fetchall():
        p = parse_query(query)
        if p is not None:
            print "Inserting parsed query."
            d = p.dumps()
            insert_parsetree(db, d, qid)
    db.close()
예제 #3
0
    def get_parsetrees(self):
        """Return a generator that yields parsetrees from the current source.

        :param self: The current object
        :type self: File
        :rtype: generator
        """
        for query in self.get_queries():
            parsetree = parse_query(query)
            if parsetree:
                parsetree.query_id = query.query_id
                yield parsetree
예제 #4
0
    def load_parsed(self):
        """Parse the queries and load them into the parsetree table.

        Each query is read from the query table, which is assumed to be
        populated, and then the result is loaded into the parsetree table.

        :param self: The current object
        :type self: queryutils.databases.Database
        :rtype: None
        """
        self.connect()
        cursor = self.execute("SELECT id, text FROM queries")
        for row in cursor.fetchall():
            d = { k:row[k] for k in row.keys() }
            parsetree = parse_query(row["text"])
            if parsetree is not None:
                logger.debug("Loading parsetree.")
                parsetree.query_id = row["id"]
                self.insert_parsetree(parsetree)
        self.close()
from collections import defaultdict
from queryanalysis.db import connect_db
from queryutils.parse import extract_stages_with_cmd, parse_query

db = connect_db()
source = "storm"
cursor = db.execute("SELECT text, source FROM queries WHERE source!=?", [source])
templates = defaultdict(int)
failed = 0
for (text, source) in cursor.fetchall():
    for s in extract_stages_with_cmd("eval", str(text)):
        try:
            p = parse_query(s)
        except:
            failed += 1
            print "Failed: ", s
            pass
        if p:
            templates[p.template().flatten()] += 1
db.close()

print "Total failed:", failed
for (template, count) in templates.iteritems():
    print count, template

def space_around_nonletters(old):
    new = ""
    for i in range(len(old)):
        if i == 0:
            new = ''.join([new, old[i]])
        elif (old[i] in string.ascii_letters and not old[i-1] in string.ascii_letters) or \
            (not old[i] in string.ascii_letters and old[i-1] in string.ascii_letters):
            new = ' '.join([new, old[i]])
        else:
            new = ''.join([new, old[i]])
    return new

db = connect_db()
cursor = db.execute("SELECT distinct(text), source FROM queries LIMIT 500")
for (text, source) in cursor.fetchall():
    p = None
    try:
        p = parse_query(text)
    except:
        pass
    if p:
        for (function, fingerprint) in lsi_tuples_from_parsetree(p):
            print text
            print "source:", source
            print "function:", function
            print "fingerprint:", fingerprint
            print 

db.close()