def output_distance_graph(size, graphfile, distancesfile=DISTFILE, normalize=False): seen = {} parsetrees = [] for queries in splqueryutils.get_queries(limit=800*BYTES_IN_MB): to_parse = [] for query in queries: if not query.text in seen: to_parse.append(query) seen[query.text] = 1 parsetrees = parsetrees + parse_queries(to_parse) sys.stderr.write(str(len(parsetrees)) + '\n') if len(parsetrees) > size: break print "Computing distances." distances = get_parsetree_distances(parsetrees, distancesfile, normalize=normalize) plot_query_distance_graph(distances)
def main(cmd): seen = {} printed = 0 for queries in splqueryutils.get_queries(limit=800*BYTES_IN_MB): for query in queries: if not query.text in seen: stages = splqueryutils.break_into_stages(query) cmd_invocations = splqueryutils.filter_stages_by(cmd, stages) for c in cmd_invocations: try: splparse(c) except Exception as e: print e.message print c printed += 1 seen[query.text] = 1 print "Failed command invocations: " + str(printed)