def run_query_for_prj(fold_no, query_text): conn = sqlite3.connect(DB_CONN) c = conn.cursor() query_info = json.loads(query_text) folder, filename = query_info["folder"], query_info["file"] print "Folder-name:" + folder, "File:" + filename call_list = [] for call in query_info["calls"]: call_list.append(call["tgt"]) other_calls = ','.join(query_info["other_calls"]) #print other_calls calls = ','.join(call_list) arg_types = ','.join(extract_types(query_info['context'])) arg_values = ','.join(process_tokens(query_info['context'])) obj_name = ','.join(process_obj_name(query_info['obj'])) for fold in range(1, FOLDS + 1): if fold != fold_no: try: c.execute( "INSERT INTO TRAINSET_{fold} (obj_type, obj_name, calls, arg_types, arg_values, other_calls) VALUES (?, ?, ?, ?, ?, ?)" .format(fold=str(fold)), (query_info['type'], obj_name, calls, arg_types, arg_values, other_calls)) except sqlite3.OperationalError, msg: print msg
def run_query_for_prj(fold_no, query_text): conn=sqlite3.connect(DB_CONN) c=conn.cursor() query_info=json.loads(query_text) folder, filename = query_info["folder"],query_info["file"] print "Folder-name:"+folder, "File:"+filename call_list=[] for call in query_info["calls"]: call_list.append(call["tgt"]) other_calls=','.join(query_info["other_calls"]) #print other_calls calls=','.join(call_list) arg_types=','.join(extract_types(query_info['context'])) arg_values=','.join(process_tokens(query_info['context'])) obj_name=','.join(process_obj_name(query_info['obj'])) for fold in range(1,FOLDS+1): if fold!=fold_no: try: c.execute( "INSERT INTO TRAINSET_{fold} (obj_type, obj_name, calls, arg_types, arg_values, other_calls) VALUES (?, ?, ?, ?, ?, ?)".format(fold=str(fold)), (query_info['type'], obj_name, calls, arg_types, arg_values, other_calls)) except sqlite3.OperationalError, msg: print msg
def get_recos(query, fold_no, context_features, fname): recommendations=[] df_graph=None source=[l for l in query.split('\n') if l!=''] #source=source[:-1]+process(source[-1]) """Extract the Query Object""" last_line=process(source[-1]) query_line=re.split('=|\(|\)|\:|\,|\\s*',last_line[-1][:-1]) query_obj=re.findall(r'([self|\w]+.*)',query_line[-1])[-1] query_obj=query_obj.replace('\"','\'') #print fname, "query_obj", query_obj """Get the data flow graph using the least compilable code in the query""" source=source[:-1]+[source[-1]+"query_method"] l=len(source) i=l try_stack=[] parenthesis_stack=[] is_last_loop=True count=0 while not df_graph: for c in source[i-1][::-1]: if c in [')','}',']']: parenthesis_stack.append(c) elif c=='(': if not parenthesis_stack or parenthesis_stack[-1]!=')': source[l-1]=source[l-1]+')' #parenthesis_stack.append('(') #i=l else: parenthesis_stack.pop() elif c=='{': if not parenthesis_stack or parenthesis_stack[-1]!='}': source[l-1]=source[l-1]+'}' #parenthesis_stack.append('{') #i=l else: parenthesis_stack.pop() elif c=='[': if not parenthesis_stack or parenthesis_stack[-1]!=']': source[l-1]=source[l-1]+']' #parenthesis_stack.append('{') #i=l else: parenthesis_stack.pop() split_str=source[i-1].split() if split_str and is_last_loop: if split_str[-1][-1]==':': is_last_loop=False if 'try:' in source[i-1].strip() \ and i!=l: pos=source[i-1].find('try') indent_prefix=source[i-1][:pos] if indent_prefix not in try_stack: source=source[:l] source.append(indent_prefix+'except:') source.append(indent_prefix+'\t'+ 'pass') try_stack.append(indent_prefix) l=l+3 else: try_stack.remove(indent_prefix) if 'except ' in source[i-1] or 'except:' in source[i-1]\ and i!=l: pos=source[i-1].find('except') indent_prefix=source[i-1][:pos] if indent_prefix not in try_stack: try_stack.append(source[i-1][:pos]) if is_last_loop and len(split_str)>1: if 'if' in split_str[1:] and i==l: source[i-1]+=" else ''" for word in keywords: if word == source[i-1].split()[0]: pos=source[i-1].find(word) indent_prefix=source[i-1][:pos] if source[l-1][-1]!=':': source[l-1]=source[l-1]+':' source.append(indent_prefix+'\t'+'pass') if word=='except': try_stack.append(indent_prefix) l=l+1 is_last_loop=False break df_graph=ASTBuilder('\n'.join(source[:l])).build_AST() #df_graph=ASTBuilder('\n'.join(source[:i]+source[l:except_count])).build_AST() #print '\n'.join(source[:l][-40:]) #print '\n'.join(source[:i]+source[l:except_count]) # print source[i-1], try_stack # print '\n'.join(source[:i]+source[l:except_count][-20:]) #print '-'*40 i=i-1 count+=1 if i==0: break if count>500: print fname, "INFINITE LOOP" break print df_graph """Get Nearest Neighbours using Manhattan distance""" if df_graph: query_obj_types=[] query_obj_context=[] calls=[] other_calls=[] sql_query=[] assign_nodes=[] assign_nodes, call_nodes=df_graph.find_definitions_and_calls(query_obj) print call_nodes if assign_nodes: for node in assign_nodes: print node query_obj_types.extend(node.src) if node.context: print node.context, context_features for feature in context_features: if feature=='arg_type': sql_query.append('arg_types') query_obj_context.extend( extract_types(node.context)) elif feature=='arg_value': sql_query.append('arg_values') query_obj_context.extend( process_tokens(node.context)) elif feature=='object_name': sql_query.append('obj_name') query_obj_context.extend( process_obj_name(node.tgt) ) for call_type in call_nodes: if call_type=='object': calls.extend(call_nodes[call_type]) else: other_calls.extend(call_nodes[call_type]) sql_query.append('other_calls') sql_query.append('calls') query_count=Counter(calls+query_obj_context) conn=sqlite3.connect("pyty.db") c=conn.cursor() objects=[] for type in query_obj_types: sql_select='''SELECT {attr} FROM TRAINSET_{fold} WHERE obj_type=?'''.format( attr=','.join(sql_query),fold=fold_no) results=c.execute(sql_select,(type,)) if results: for obj in results: obj_count=Counter() for i in range(len(obj)): if obj[i]: obj_count+=Counter(obj[i].split(',')) obj_calls=obj[-1].split(',') if obj[-1] else '' score=compute_euclidean_dist(query_count, obj_count) objects.append((obj_calls, score)) objects=sorted(objects, key=lambda tup: tup[1]) call_set=Counter() min_score ='' for object in objects: if min_score=='': min_score=object[1] if object[1]==min_score: call_set.update(Counter(object[0])-query_count) elif len(call_set)<MAX_RECOS: min_score=object[1] else: break total=float(sum(call_set.values())) recommendations.extend([call[0] for call in call_set.most_common(MAX_RECOS)]) return recommendations