def insert(file, table): count=0 cursor=Config.connect() row={} for line in file: tmp_array=line.split("\t") row['UniprotAC']=tmp_array[0] row['GI']=tmp_array[4] row['RefSeq']=tmp_array[3] row['EntrezGene']=tmp_array[2] row['GoId']=tmp_array[7] if tmp_array[7]=="": pass GI_data=tmp_array[4] refseq_data=tmp_array[3] if len(GI_data.split(";"))>1: tmp_GI=GI_data.split(";") for AC_GI in tmp_GI: row['GI']=AC_GI.replace(" ","") query=QueryGenerator.gen_insert(table, row) cursor.execute(query) count=count+1 if len(refseq_data.split(";"))>1: tmp_refseq=refseq_data.split(";") for AC_RefSeq in tmp_refseq: row['RefSeq']=AC_RefSeq.replace(" ","") query=QueryGenerator.gen_insert(table, row) cursor.execute(query) count=count+1 else: query=QueryGenerator.gen_insert(table, row) cursor.execute(query) count=count+1 return count
def update_results(): # get user choices if they exist (batch required) bSelected = request.args.get('bSelected', type=str) if len(bSelected) == 0: # TODO: Figure out a better way to catch this error, probably # through html solution that requires user to choose batch before # using other selects return jsonify(resultstable="MUST SELECT A BATCH") cSelected = request.args.getlist('cSelected[]') mSelected = request.args.getlist('mSelected[]') #query narf results by batch, store in dataframe results = qg.QueryGenerator(dbc,tablename='NarfResults',\ batchnum=bSelected).send_query() # TODO: Figure out why models parsed from analysis selection aren't # matching up with cells filtered by batch selection. #filter dataframe by selected cells, if any if not (len(cSelected) == 0): results = results[results.cellid.isin(cSelected)] #same for selected models, if any if not (len(mSelected) == 0): results = results[results.modelname.isin(mSelected)] return jsonify(resultstable=results.to_html(classes='table-hover\ table-condensed'))
def main_view(): tablelist = 'NarfResults' plottypelist = 'Scatter' measurelist = 'r_test' resultstable = '' # TODO: Figure out how to get initial lists to load faster - added distinct # and field selections to shrink query, but initial page load is still very slow # if selection limit is lifted (so far tested up to 200k, takes ~20 seconds to load) # possibility: organize as analysis, batch, model objects and load a global-scope # dataframe with the relevant data on app launch? then just have view functions index # into dataframe as needed instead of generating new queries each time analyses = qg.QueryGenerator(dbc, column='name', tablename='NarfAnalysis').send_query() analyses = analyses.iloc[:, 0] #convert from 1-dim df to series analysislist = analyses.tolist() batches = qg.QueryGenerator(dbc,distinct=True,column='batch',tablename=\ 'NarfBatches').send_query() batches = batches.iloc[:, 0] batchlist = batches.tolist() # Leave this out for now to improve page load time """ ## Maybe don't need this one with new setup? Cells can just populate after ## analysis is selected. Unless want to be able to sort by a specific cell first. cells = qg.QueryGenerator(dbc,distinct=True,column='cellid',tablename='NarfBatches').send_query() cells = cells.iloc[:,0] # get unique list since there are duplicates celllist = cells.tolist() models = qg.QueryGenerator(dbc,distinct=True,column='modelname',tablename='NarfResults').send_query() models = models.iloc[:,0] modellist = models.tolist() """ return render_template('main.html', analysislist = analysislist,\ tablelist = tablelist,\ batchlist = batchlist,\ #celllist = celllist,\ #modellist = modellist,\ plottypelist = plottypelist, measurelist = measurelist, )
def print_query_generator(queries_file): if opts.verbose: gen = QueryGenerator.parse(queries_file) for op_name, values in gen.ops.iteritems(): print op_name, values print '\'', gen.base, '\'' print gen.idents print ''
def view_database(): tablelist = 'NarfResults' batches = qg.QueryGenerator(dbc, column='batch', tablename='NarfBatches').send_query() batches = batches.iloc[:, 0] # get unique list since there are duplicates batchlist = list(set(batches.tolist())) models = qg.QueryGenerator(dbc, column='modelname', tablename='NarfResults').send_query() models = models.iloc[:, 0] modellist = list(set(models.tolist())) return render_template('database.html', tablelist=tablelist, batchlist=batchlist,\ modellist=modellist)
def update_cells(): bSelected = request.args.get('bSelected', 'no selection', type=str) celllist = qg.QueryGenerator(dbc,column='cellid',tablename='NarfBatches',\ batchnum=bSelected).send_query() celllist = celllist.iloc[:, 0].tolist() return jsonify(celllist=celllist)
def update_batch(): aSelected = request.args.get('aSelected', 'no selection', type=str) batch = qg.QueryGenerator(dbc,tablename='NarfAnalysis',\ analysis=aSelected).send_query() # get string of first 3 characters, rest is description of batch batchnum = batch['batch'].iloc[0][:3] #return batchnum for selected analysis in jQuery-friendly format return jsonify(batchnum=batchnum)
def req_query(tablename, batchnum, modelname): # parse variables from URL and pass to QueryGenerator object as attributes query = qg.QueryGenerator(tablename, batchnum, modelname) # populate dataframe by calling send_query() on qg object data = query.send_query() # TODO: Figure out best practice for when and where to close database connection. # For now, will continue putting it in any time the conneciton is no longer needed # for current operation. query.close_connection() # generate descriptive table title from variables tabletitle = ("%s, filter by: batch=%s, model=%s" % (tablename, batchnum, modelname)) # generage html page via table.html template, pass in html export of dataframe # and table title as variables return render_template('table.html', table=data.to_html(classes='Table'),\ title=tabletitle)
def req_query(): # add if statements to check if request data exists before pulling tablename = request.form['tablename'] batchnum = request.form['batchnum'] modelname = request.form['modelname'] query = qg.QueryGenerator(dbc,tablename=tablename,batchnum=batchnum,\ modelname=modelname) # populate dataframe by calling send_query() on qg object data = query.send_query() tabletitle = ("%s, filter by: batch=%s, model=%s" % (tablename, batchnum, modelname)) # generage html page via table.html template, pass in html export of dataframe # and table title as variables return render_template('table.html', table=data.to_html(classes='table'),\ title=tabletitle)
def update_models(): aSelected = request.args.get('aSelected', 'no selection', type=str) #currently disabled until modelfinder methods are fixed - combo array #recursion crashing website analysis = qg.QueryGenerator(dbc,column='modeltree',tablename='NarfAnalysis',\ analysis=aSelected).send_query() # pull modeltree text from NarfAnalysis # and convert to string rep modeltree = analysis.iloc[0, 0] modelFinder = mf.ModelFinder(modeltree) modellist = modelFinder.modellist #modellist = ['testing','jquery','code','for','analysis','update',aSelected] return jsonify(modellist=modellist)
def loadData(balanceDatasets): # Read from files legitStrings, maliciousStrings = qg.load_csvFiles(balanceDatasets) # Create SQL Injection queries legitSet, maliciousSet = qg.generate_dataset(legitStrings, maliciousStrings) return legitSet, maliciousSet
def check_database(queries_file, exec_sql, exec_aql, stop_on_failure=False, verbose=False): queries_log = '' if queries_file is None: raise Exception('you need to provide a generator queries file') sum_rows = 0 nb_checked, nb_error = 0, 0 sql_sum_time, aql_sum_time = 0, 0 for aql_query in QueryGenerator.iterate(queries_file): ss = AQLParser.Statements() ss.parse(aql_query) sql_query = ss.to_sql(separator='\n') # aql_query = AQLParser.kjeq_parse(aql_query) # FIXME if verbose: print '' print '===' print '' print aql_query print '' print sql_query ################################################## # skip full outer because not supported by mysql # if exec_sql.sgbd == 'MySQL' and sql_query.lower().find( 'full outer') != -1: if verbose: print "full outer not supported by MySQL, skip query" continue ################################################## nb_checked += 1 rc, sql_time, sql_rows = exec_sql.execute_and_fetch( sql_query.replace(';', '')) rc, aql_time, aql_rows = exec_aql.execute(aql_query) if rc == 0: if ('ORDER' in aql_query) and (not util.row_in( sql_rows, aql_rows, True)): rc = 1 elif (not util.row_in(sql_rows, aql_rows)) or (not util.row_in( aql_rows, sql_rows)): rc = 1 if rc != 0: nb_error += 1 if verbose: print_query(sys.stderr, 'ERROR: query failed', aql_query, sql_query, aql_rows, sql_rows) elif rc == 0: sys.stdout.write('d') elif rc != 0: sys.stdout.write('e') queries_log += '<Query id="' + str( nb_checked) + '" status="error" type="' + ( 'data' if rc == 0 else 'aq_engine') + '">\n' queries_log += '<AQL>\n' queries_log += aql_query + '\n' queries_log += '</AQL>\n' queries_log += '<SQL>\n' queries_log += sql_query + '\n' queries_log += '</SQL>\n' queries_log += '<Results nb="' + str(len(aql_rows)) + '">\n' for row in aql_rows: queries_log += '<row>' + ','.join(str(v) for v in row) + '</row>\n' queries_log += '\n' queries_log += '</Results>\n' queries_log += '<Expected nb="' + str(len(sql_rows)) + '">\n' for row in sql_rows: queries_log += '<row>' + str(row) + '</row>\n' queries_log += '\n' queries_log += '</Expected>\n' queries_log += '</Query>\n' if stop_on_failure: return (nb_checked, nb_error) else: sum_rows += len(sql_rows) sql_sum_time += sql_time aql_sum_time += aql_time if verbose: print_query(sys.stdout, 'query successfully checked', aql_query, sql_query, aql_rows, sql_rows) else: sys.stdout.write('.') # queries_log += '<query status="successful">\n' # queries_log += aql_query # queries_log += '</query>\n' return (nb_checked, nb_error, queries_log, sql_sum_time, aql_sum_time, sum_rows)
__author__ = 'Ivan Dortulov' from QueryGenerator import * gen = QueryGenerator('localhost', 'WorkDatabase', 'postgres', 'postgres') graph = gen.build_table_graph() print(gen.generate_query(graph, 'blue'))
def check_database(queries_file, exec_sql, exec_aql, stop_on_failure=False, verbose=False): queries_log = '' if queries_file is None: raise Exception('you need to provide a generator queries file') sum_rows = 0 nb_checked, nb_error = 0, 0 sql_sum_time, aql_sum_time = 0, 0 for aql_query in QueryGenerator.iterate(queries_file): ss = AQLParser.Statements() ss.parse(aql_query) sql_query = ss.to_sql(separator = '\n') # aql_query = AQLParser.kjeq_parse(aql_query) # FIXME if verbose: print '' print '===' print '' print aql_query print '' print sql_query ################################################## # skip full outer because not supported by mysql # if exec_sql.sgbd == 'MySQL' and sql_query.lower().find('full outer') != -1: if verbose: print "full outer not supported by MySQL, skip query" continue ################################################## nb_checked += 1 rc, sql_time, sql_rows = exec_sql.execute_and_fetch(sql_query.replace(';', '')) rc, aql_time, aql_rows = exec_aql.execute(aql_query) if rc == 0: if ('ORDER' in aql_query) and (not util.row_in(sql_rows, aql_rows, True)): rc = 1 elif (not util.row_in(sql_rows, aql_rows)) or (not util.row_in(aql_rows, sql_rows)): rc = 1 if rc != 0: nb_error += 1 if verbose: print_query(sys.stderr, 'ERROR: query failed', aql_query, sql_query, aql_rows, sql_rows) elif rc == 0: sys.stdout.write('d') elif rc != 0: sys.stdout.write('e') queries_log += '<Query id="' + str(nb_checked) + '" status="error" type="' + ('data' if rc == 0 else 'aq_engine') + '">\n' queries_log += '<AQL>\n' queries_log += aql_query + '\n' queries_log += '</AQL>\n' queries_log += '<SQL>\n' queries_log += sql_query + '\n' queries_log += '</SQL>\n' queries_log += '<Results nb="' + str(len(aql_rows)) + '">\n' for row in aql_rows: queries_log += '<row>' + ','.join(str(v) for v in row) + '</row>\n' queries_log += '\n' queries_log += '</Results>\n' queries_log += '<Expected nb="' + str(len(sql_rows)) + '">\n' for row in sql_rows: queries_log += '<row>' + str(row) + '</row>\n' queries_log += '\n' queries_log += '</Expected>\n' queries_log += '</Query>\n' if stop_on_failure: return (nb_checked, nb_error) else: sum_rows += len(sql_rows) sql_sum_time += sql_time aql_sum_time += aql_time if verbose: print_query(sys.stdout, 'query successfully checked', aql_query, sql_query, aql_rows, sql_rows) else: sys.stdout.write('.') # queries_log += '<query status="successful">\n' # queries_log += aql_query # queries_log += '</query>\n' return (nb_checked, nb_error, queries_log, sql_sum_time, aql_sum_time, sum_rows)
- Used to parse modelstring from NarfAnalysis - into a list of model names that can be passed back - to model selector """ import ast import re #for testing w/ actual model string import QueryGenerator as qg import DB_Connection as dbcon db = dbcon.DB_Connection() dbc = db.connection analysis = qg.QueryGenerator(dbc, tablename='NarfAnalysis', analysis='Jake Test').send_query() modstring = analysis['modeltree'][0] """ # for testing with simpler nested list that (hopefully) won't crash the universe modstring = "{'a','b', {{'c','d'},{ 'e','f' }}, 'g'}" # list of combos should end up as: # ['abceg','abcfg','abdeg','abdfg'] """ class ModelFinder(): def __init__(self, modelstring=''): self.modelstring = modelstring # as soon as modelstring is passed, go ahead and # parse into array then to list so that attribs can be retrieved