def get_rec_point(file): print('DEAL-WITH:' + file) #with open('types/types.txt','a+') as ff: #ff.write('FILE:'+file) with open(file) as f: lines = f.readlines() #print(lines) precode = '' trynum = 0 trycache = [] kflag = 0 lno = 0 #s='' comment_flag = 0 calls = [] for line in lines: #print(line) lno += 1 if line.strip().startswith('#'): continue if re.match('[bru]*\'\'\'$', line.strip()) or re.match( '[bru]*\"\"\"$', line.strip()): if comment_flag == 0: comment_flag = 1 else: comment_flag = 0 continue elif (re.match('[bru]*\'\'\'', line.strip()) or re.match('[bru]*\"\"\"', line.strip())) and ( re.match('.*[bru]*\'\'\'$', line.strip()) or re.match('.*[bru]*\"\"\"$', line.strip())): continue elif re.match('[bru]*\'\'\'', line.strip()) or re.match( '[bru]*\"\"\"', line.strip()) or re.match( '.*[bru]*\'\'\'$', line.strip()) or re.match( '.*[bru]*\"\"\"$', line.strip()): if comment_flag == 0: comment_flag = 1 else: comment_flag = 0 continue if comment_flag == 1: continue if 'try:' in line: trynum += 1 trycache.append(get_bank(line)) elif trynum > 0 and ('except' in line or 'finally:' in line): (bank, lenth) = get_bank(line) for i in range(len(trycache) - 1, -1, -1): if trycache[i][1] == lenth: trynum -= 1 del trycache[i] recobj = re.findall('[a-zA-Z0-9_\.\[\]]+\.[a-zA-Z0-9\_]+\(.*\)', line) #print(recobj) if len(recobj) == 0: precode += line continue #print(file) #print(recobj) rec = recobj[0] caller = get_caller(rec) if caller.startswith('['): caller = caller[1:] callee, rcallee = get_callee(rec) if callee.startswith('_') or re.match('[A-Z0-9_]+$', callee) or callee.strip() == '_': precode += line continue cp = caller + '.' + callee if cp in calls: precode += line continue else: calls.append(cp) i = 0 latest_line = line.replace(rcallee, 'unknown_api()') #print('NOTE!',latest_line) tpp = precode.strip() if tpp.endswith(','): newcontext = tpp[:-1] finalc = check(newcontext) #print(finalc) current_context = finalc + '\n' + latest_line prelast = precode.strip().split('\n')[-1] for i in range(0, len(prelast)): if prelast[i] != ' ': break finalc += '\n' + line[:i - 4] + 'reveal_type(' + caller + ')' elif tpp.endswith('(') or tpp.endswith('{') or tpp.endswith('['): newcontext = tpp finalc = check(newcontext) current_context = finalc + '\n' + latest_line #print(finalc) prelast = precode.strip().split('\n')[-1] for i in range(0, len(prelast)): if prelast[i] != ' ': break finalc += '\n' + line[:i] + 'reveal_type(' + caller + ')' else: for i in range(0, len(line)): if line[i] != ' ': break #print(i) #print(line) newcontext = tpp finalc = check(newcontext) finalc += '\n' + line[:i] + 'reveal_type(' + caller + ')' current_context = precode + latest_line if len(trycache) > 0: finalc = check_try(finalc, trycache) #print(finalc) #print('[Process[1] : Preprocessing # Getting reommendation point, simple type inference, possible API candidates and current incomplete code context.]') #print(file+'#'+str(lno)+'#'+caller+'#'+callee) #if '.' in caller: #ft='Any' #else: ft = get_type(finalc, file) ft = ft.strip() print(line.strip()) print(file + '#' + str(lno) + '#' + caller + ':' + ft + '#' + callee) print(Nonenum, Anynum, OKnum) aps = [] if ft == 'None' or ft == 'Any': if caller == 'self': for d in all_defs: dname = d.strip().split(' ')[1] aps.append(dname) elif caller == 'str' or caller == 's' or caller == 'string': ft = 'str' elif caller == 'sys.stderr' or caller == 'sys.stdout' or caller == 'sys.stdin': ft = 'module' elif caller == 'log': ft = 'logging.Logger' caller = ft elif re.match('for .* in .*\..*\(.*\).*\:', line.strip()): aps = dir(dict) aps.append('iteritems') else: #tp=caller.split('.') #fc=tp[0] if '.' in caller: xindex = caller.find('.') fc = caller[:xindex] xattr = caller[xindex + 1:] else: xattr = caller fc = caller print('check module:', fc) print('check attr:', xattr) if fc in stdlib: ft = 'module' print('stdlib!', fc) #print('module!',caller) try: module1 = importlib.import_module(caller) aps = dir(module1) except Exception: try: module2 = importlib.import_module(fc) attr = getattr(module2, xattr) aps = dir(attr) except Exception: aps = [] else: for curapi in cur_apis: if '.' + caller + '.' in curapi: idx = curapi.find('.' + caller + '.') canapi = curapi[idx + 1:] if not '.' in canapi: aps.append(canapi) print('get api form json!') print(canapi) if len(aps) == 0: apis = get_candidates(ft, caller, file) for k, v in apis.items(): aps.extend(v) if len(aps) == 0: precode += line continue ''' global apirecpoints atag=0 if file in apirecrets: if callee in apirecrets[file]: apirecpoints+=1 atag=1 ''' global pranks, ptimes, pinranks if re.match('[A-Z]+[A-Za-z]+', callee) or callee.startswith('_'): print('CONSTRUCTOR,IGNORE') precode += line continue if callee in aps: print('API IV') else: print('API OOV') pranks.append(100) global all_apis_add, all_apis all_apis_add.append(callee) tmpx = all_apis['all_apis'] tmpx.extend(all_apis_add) tmpx = list(set(tmpx)) all_apis['all_apis'] = tmpx ptimes.append(0.0) precode += line continue #ss='' #for ap in aps: #ss=ss+ap+',' #ss=ss[:-1]+'\n' #s=caller+':'+ft+'#'+callee+'\n' #print('[Process[2] : Constructing dataflow hints.]') current_dataflow = get_dataflow.get_current_dataflow2( current_context, caller) #print(maxflow) if len(current_dataflow) == 0: precode += line continue maxflow = max(current_dataflow, key=len) #print(maxflow) dataflow_scores = get_dataflow.get_dataflow_scores( aps, maxflow, current_dataflow, ft, callee) tosim_scores = get_dataflow.get_tosim_scores(aps, maxflow, current_dataflow, ft, callee) try: naming_line = re.sub(callee, '', line) except Exception as err: print(err) print(line) sys.exit() precode += line continue naming_context = precode line_scores = get_line_scores(aps, naming_line, naming_context, file) flag = 0 label = 0 global csvdatas, csvlabels if ft == 'None' or ft == 'Any' or ft == 'nothing': flag = 0 for api in aps: if api.startswith('__') or re.match('[A-Z0-9_]+$', api) or api.strip() == '_': continue if api == callee: label = 1 else: label = 0 try: s = str(dataflow_scores[api]) + ',' + str( tosim_scores[api]) + ',' + str( line_scores[api]) + ',0.0\n' csvdatas += s csvlabels += str(label) + '\n' with open(datakfile, 'w+') as f: f.write(csvdatas) with open(labelkfile, 'w+') as f: f.write(csvlabels) except Exception as err: print(err) sys.exit(0) flag = 1 break if flag == 1: precode += line continue else: flag = 0 conum_scores = get_conum_scores(aps, naming_context, file) for api in aps: if api.startswith('__') or re.match('[A-Z0-9_]+$', api) or api.strip() == '_': continue if api == callee: label = 1 else: label = 0 try: s = str(dataflow_scores[api]) + ',' + str( tosim_scores[api]) + ',' + str( line_scores[api]) + ',' + str( conum_scores[api]) + '\n' #s=str(dataflow_scores[api])+','+str(line_scores[api])+','+str(conum_scores[api])+'\n' csvdatas += s csvlabels += str(label) + '\n' with open(datakfile, 'w+') as f: f.write(csvdatas) with open(labelkfile, 'w+') as f: f.write(csvlabels) except Exception as err: print(err) sys.exit(0) flag = 1 break if flag == 1: print('This is not a recommendation point.') precode += line continue
def get_rec_point(file): with open(file) as f: lines = f.readlines() #print(lines) precode = '' trynum = 0 trycache = [] kflag = 0 lno = 0 #s='' comment_flag = 0 calls = [] for line in lines: #print(line) lno += 1 if line.strip().startswith('#'): continue if re.match('[bru]*\'\'\'$', line.strip()) or re.match( '[bru]*\"\"\"$', line.strip()): if comment_flag == 0: comment_flag = 1 else: comment_flag = 0 continue elif (re.match('[bru]*\'\'\'', line.strip()) or re.match('[bru]*\"\"\"', line.strip())) and ( re.match('.*[bru]*\'\'\'$', line.strip()) or re.match('.*[bru]*\"\"\"$', line.strip())): continue elif re.match('[bru]*\'\'\'', line.strip()) or re.match( '[bru]*\"\"\"', line.strip()) or re.match( '.*[bru]*\'\'\'$', line.strip()) or re.match( '.*[bru]*\"\"\"$', line.strip()): if comment_flag == 0: comment_flag = 1 else: comment_flag = 0 continue if comment_flag == 1: continue if 'try:' in line: trynum += 1 trycache.append(get_bank(line)) elif trynum > 0 and ('except' in line or 'finally:' in line): (bank, lenth) = get_bank(line) for i in range(len(trycache) - 1, -1, -1): if trycache[i][1] == lenth: trynum -= 1 del trycache[i] recobj = re.findall('[a-zA-Z0-9_\.\[\]]+\.[a-zA-Z0-9\_]+\(.*\)', line) #print(recobj) if len(recobj) == 0: precode += line continue #print(file) #print(recobj) rec = recobj[0] caller = get_caller(rec) #print(caller) callee, rcallee = get_callee(rec) #print(callee) if callee.startswith('__') or re.match( '[A-Z0-9_]+$', callee) or callee.strip() == '_': precode += line continue cp = caller + '.' + callee if cp in calls: precode += line continue else: calls.append(cp) i = 0 latest_line = line.replace(rcallee, 'unknown_api()') #print('NOTE!',latest_line) tpp = precode.strip() if tpp.endswith(','): newcontext = tpp[:-1] finalc = check(newcontext) #print(finalc) current_context = finalc + '\n' + latest_line prelast = precode.strip().split('\n')[-1] for i in range(0, len(prelast)): if prelast[i] != ' ': break finalc += '\n' + line[:i - 4] + 'reveal_type(' + caller + ')' elif tpp.endswith('(') or tpp.endswith('{') or tpp.endswith('['): newcontext = tpp finalc = check(newcontext) current_context = finalc + '\n' + latest_line #print(finalc) prelast = precode.strip().split('\n')[-1] for i in range(0, len(prelast)): if prelast[i] != ' ': break finalc += '\n' + line[:i] + 'reveal_type(' + caller + ')' else: for i in range(0, len(line)): if line[i] != ' ': break #print(i) #print(line) newcontext = tpp finalc = check(newcontext) finalc += '\n' + line[:i] + 'reveal_type(' + caller + ')' current_context = precode + latest_line if len(trycache) > 0: finalc = check_try(finalc, trycache) #print(finalc) #print('[Process[1] : Preprocessing # Getting reommendation point, simple type inference, possible API candidates and current incomplete code context.]') #print(file+'#'+str(lno)+'#'+caller+'#'+callee) #if '.' in caller: #ft='Any' #else: ft = get_type(finalc, file) ft = ft.strip() print(file + '#' + str(lno) + '#' + caller + ':' + ft + '#' + callee) print(Nonenum, Anynum, OKnum) #print(file+'#'+str(lno)+'#'+caller+':'+ft+'#'+callee) apis = get_candidates(ft, caller, file) aps = [] for k, v in apis.items(): aps.extend(v) if len(aps) == 0: precode += line continue #ss='' #for ap in aps: #ss=ss+ap+',' #ss=ss[:-1]+'\n' #s=caller+':'+ft+'#'+callee+'\n' print(line.strip(), "#API:", callee, "#TYPE:", ft) if callee in aps: print('IOV') else: print('OOV') aps = [] aps.append(callee) #print(aps) #print('[Process[2] : Constructing dataflow hints.]') current_dataflow = get_dataflow.get_current_dataflow2( current_context, caller) #print(maxflow) if len(current_dataflow) == 0: precode += line continue maxflow = max(current_dataflow, key=len) #print(maxflow) #print(maxflow,current_dataflow) start = time.time() dataflow_scores = get_dataflow.get_dataflow_scores( aps, maxflow, current_dataflow, ft, callee) tosim_scores = get_dataflow.get_tosim_scores(aps, maxflow, current_dataflow, ft, callee) try: naming_line = re.sub(rcallee, '', line) except Exception: precode += line continue #print(dataflow_scores) #print(tosim_scores) naming_context = precode line_scores = get_line_scores(aps, naming_line, naming_context, file) conum_scores = get_conum_scores(aps, naming_context, file) flag = 0 datas = '' labels = '' for api in aps: if api.startswith('__') or re.match('[A-Z0-9_]+$', api) or api.strip() == '_': continue if api == callee: label = '1' else: label = '0' #apis.append(api) datas += str(dataflow_scores[api]) + ',' + str( tosim_scores[api]) + ',' + str(line_scores[api]) + ',' + str( conum_scores[api]) + '\n' labels += label + '\n' with open(datakfile, 'a+') as f: f.write(datas) with open(labelkfile, 'a+') as f: f.write(labels) precode += line
def get_rec_point(file): print('DEAL-WITH:'+file) #with open('types/types.txt','a+') as ff: #ff.write('FILE:'+file) with open(file) as f: lines=f.readlines() #print(lines) precode='' trynum=0 trycache=[] kflag=0 lno=0 #s='' comment_flag=0 calls=[] for line in lines: #print(line) lno+=1 if line.strip().startswith('#'): continue if re.match('[bru]*\'\'\'$',line.strip()) or re.match('[bru]*\"\"\"$',line.strip()): if comment_flag==0: comment_flag=1 else: comment_flag=0 continue elif (re.match('[bru]*\'\'\'',line.strip()) or re.match('[bru]*\"\"\"',line.strip())) and (re.match('.*[bru]*\'\'\'$',line.strip()) or re.match('.*[bru]*\"\"\"$',line.strip())): continue elif re.match('[bru]*\'\'\'',line.strip()) or re.match('[bru]*\"\"\"',line.strip()) or re.match('.*[bru]*\'\'\'$',line.strip()) or re.match('.*[bru]*\"\"\"$',line.strip()): if comment_flag==0: comment_flag=1 else: comment_flag=0 continue if comment_flag==1: continue if 'try:' in line: trynum+=1 trycache.append(get_bank(line)) elif trynum>0 and ('except' in line or 'finally:' in line): (bank,lenth)=get_bank(line) for i in range(len(trycache)-1,-1,-1): if trycache[i][1]==lenth: trynum-=1 del trycache[i] recobj=re.findall('[a-zA-Z0-9_\.\[\]]+\.[a-zA-Z0-9\_]+\(.*\)',line) #print(recobj) if len(recobj)==0: precode+=line continue #print(file) #print(recobj) rec=recobj[0] caller=get_caller(rec) if caller.startswith('['): caller=caller[1:] callee,rcallee=get_callee(rec) if callee.startswith('_') or re.match('[A-Z0-9_]+$',callee) or callee.strip()=='_': precode+=line continue cp=caller+'.'+callee if cp in calls: precode+=line continue else: calls.append(cp) i=0 latest_line=line.replace(rcallee,'unknown_api()') #print('NOTE!',latest_line) tpp=precode.strip() if tpp.endswith(','): newcontext=tpp[:-1] finalc=check(newcontext) #print(finalc) current_context=finalc+'\n'+latest_line prelast=precode.strip().split('\n')[-1] for i in range(0,len(prelast)): if prelast[i]!=' ': break finalc+='\n'+line[:i-4]+'reveal_type('+caller+')' elif tpp.endswith('(') or tpp.endswith('{') or tpp.endswith('['): newcontext=tpp finalc=check(newcontext) current_context=finalc+'\n'+latest_line #print(finalc) prelast=precode.strip().split('\n')[-1] for i in range(0,len(prelast)): if prelast[i]!=' ': break finalc+='\n'+line[:i]+'reveal_type('+caller+')' else: for i in range(0,len(line)): if line[i]!=' ': break #print(i) #print(line) newcontext=tpp finalc=check(newcontext) finalc+='\n'+line[:i]+'reveal_type('+caller+')' current_context=precode+latest_line if len(trycache)>0: finalc=check_try(finalc,trycache) #print(finalc) #print('[Process[1] : Preprocessing # Getting reommendation point, simple type inference, possible API candidates and current incomplete code context.]') #print(file+'#'+str(lno)+'#'+caller+'#'+callee) #if '.' in caller: #ft='Any' #else: ft=get_type(finalc,file) ft=ft.strip() print(line.strip()) print(file+'#'+str(lno)+'#'+caller+':'+ft+'#'+callee) print(Nonenum,Anynum,OKnum) aps=[] if ft=='None' or ft=='Any': if caller=='self': for d in all_defs: dname=d.strip().split(' ')[1] aps.append(dname) elif caller=='str' or caller=='s' or caller=='string': ft='str' elif caller=='sys.stderr' or caller=='sys.stdout' or caller=='sys.stdin': ft='module' elif caller=='log': ft='logging.Logger' caller=ft elif re.match('for .* in .*\..*\(.*\).*\:',line.strip()): aps=dir(dict) aps.append('iteritems') else: #tp=caller.split('.') #fc=tp[0] if '.' in caller: xindex=caller.find('.') fc=caller[:xindex] xattr=caller[xindex+1:] else: xattr=caller fc=caller print('check module:',fc) print('check attr:',xattr) if fc in stdlib: ft='module' print('stdlib!',fc) #print('module!',caller) try: module1=importlib.import_module(caller) aps=dir(module1) except Exception: try: module2=importlib.import_module(fc) attr=getattr(module2,xattr) aps=dir(attr) except Exception: aps=[] else: for curapi in cur_apis: if '.'+caller+'.' in curapi: idx=curapi.find('.'+caller+'.') canapi=curapi[idx+1:] if not '.' in canapi: aps.append(canapi) print('get api form json!') print(canapi) if len(aps)==0: apis = get_candidates(ft,caller,file) for k,v in apis.items(): aps.extend(v) if len(aps)==0: precode+=line continue global pranks,ptimes,pinranks if re.match('[A-Z]+[A-Za-z]+',callee) or callee.startswith('_'): print('CONSTRUCTOR,IGNORE') precode+=line continue if callee in aps: print('API IV') else: print('API OOV') pranks.append(100) global all_apis_add,all_apis all_apis_add.append(callee) tmpx=all_apis['all_apis'] tmpx.extend(all_apis_add) tmpx=list(set(tmpx)) all_apis['all_apis']=tmpx ptimes.append(0.0) precode+=line continue #ss='' #for ap in aps: #ss=ss+ap+',' #ss=ss[:-1]+'\n' #s=caller+':'+ft+'#'+callee+'\n' #print('[Process[2] : Constructing dataflow hints.]') current_dataflow=get_dataflow.get_current_dataflow2(current_context,caller) #print(maxflow) if len(current_dataflow)==0: precode+=line continue maxflow=max(current_dataflow,key=len) #print(maxflow) dataflow_scores=get_dataflow.get_dataflow_scores(aps,maxflow,current_dataflow,ft,callee) tosim_scores=get_dataflow.get_tosim_scores(aps,maxflow,current_dataflow,ft,callee) try: naming_line=re.sub(callee,'',line) except Exception as err: print(err) print(line) sys.exit() precode+=line continue naming_context=precode line_scores=get_line_scores(aps,naming_line,naming_context,file) label=0 apis=[] with open('testdata/test.csv','w+') as f: f.write('f1,f2,f3,f4\n') start=time.time() if ft=='None' or ft=='Any' or ft=='nothing': for api in aps: if api.startswith('__') or re.match('[A-Z0-9_]+$',api) or api.strip()=='_': continue if api==callee: label=1 else: label=0 apis.append(api) try: s=str(dataflow_scores[api])+','+str(tosim_scores[api])+','+str(line_scores[api])+',0.0\n' with open('testdata/test.csv','a+') as f: f.write(s) except Exception as err: print(err) sys.exit(0) else: flag=0 conum_scores=get_conum_scores(aps,naming_context,file) for api in aps: if api.startswith('__') or re.match('[A-Z0-9_]+$',api) or api.strip()=='_': continue if api==callee: label=1 else: label=0 apis.append(api) try: s=str(dataflow_scores[api])+','+str(tosim_scores[api])+','+str(line_scores[api])+','+str(conum_scores[api])+'\n' with open('testdata/test.csv','a+') as f: f.write(s) except Exception as err: print(err) sys.exit(0) test_data=pd.read_csv('testdata/test.csv') #print(apis) #print(len(apis)) #print(test_data) clf=joblib.load('traincsv/'+CURRENT_PROJ+'1.pkl') result=clf.predict_proba(test_data) candidates={} for i in range(0,len(apis)): candidates[apis[i]]=result[i][1] if re.match('List\[.*\]',ft): candidates['append']=100.0 candidates['extend']=99.0 elif caller=='json': candidates['loads']=100.0 candidates['dumps']=99.0 candidates['load']=98.0 candidates['dump']=97.0 elif ft=='str' and caller.strip()!='': candidates['split']=100.0 elif ft=='str' and caller.strip()=='': candidates['join']=100.0 candidates['format']=99.0 elif caller=='re' and '=' in line: candidates['compile']=100.0 elif caller=='re'and 'if' in line: candidates['match']=100.0 elif caller=='os.environ': candidates['get']=100.0 elif caller=='warnings': candidates['warn']=100.0 elif caller=='time' and '=' in line: candidates['time']=100.0 candidates['sleep']=99.0 elif caller=='time': candidates['time']=99.0 candidates['sleep']=100.0 elif caller=='copy': candidates['deepcopy']=100.0 candidates['copy']=99.0 elif re.match('.*log.*=.*logging\..*',line): candidates['getLogger']=100.0 cans=sorted(candidates.items(), key=lambda x: x[1], reverse=True) #print(cans) end = time.time() ts=end - start #times.append(ts) rank=21 for k in range(0,len(cans)): if cans[k][0]==callee: rank=k+1 #print('Ranked '+str(rank)) if rank > 20: pranks.append(rank) #if atag==1: #aranks.append(rank) # Record: PRIAN cannot recommend, jumo to next recommendation. else: # PRIAN successfully recommends. pranks.append(rank) #if atag==1: #aranks.append(rank) ptimes.append(ts) #alltimes+=ts+'\n' pinranks.append(rank) precode+=line get_results(pinranks) get_results(pranks)