def testTime(): sqlite_file = "/home/dhvani/python-sources.sqlite3" conn = sqlite3.connect(sqlite_file) c = conn.cursor() print "Success Connection to database..." c.execute( "SELECT source FROM source_file INNER JOIN eligible_source ON source_file.hash = eligible_source.hash" ) print "Executed SELECT..." print "Fetching all rows..." all_rows = c.fetchmany(size=10)[0] print all_rows[0] #for row in all_rows: #row = all_rows[0] #print row[0], str(row[1]).encode('hex') print "Got Em..." print "Running PyPy test..." tokenize.tokenize(StringIO.StringIO(all_rows[0]).readline, handle_token) toTest = checkPyPySyntax(all_rows[0]) print "DONE" if toTest == None: #output = '' #print output print "SUCCESS" else: print "Try again..."
def test_syntax_error(self): toTest = checkPyPySyntax(ERROR_TEST) self.assertTrue(isinstance(toTest[0], CompileError)) self.assertEqual(toTest[0].filename, 'toCheck.py'.encode()) self.assertEqual(toTest[0].line, 1) self.assertEqual(toTest[0].column, None) self.assertEqual(toTest[0].functionname, None) self.assertEqual(toTest[0].text, 'unmatched \')\':if(true)):'.encode()) self.assertEqual(toTest[0].errorname, 'SyntaxError'.encode())
def test_syntax_error(self): toTest = checkPyPySyntax(ERROR_TEST) self.assertTrue(isinstance (toTest[0], CompileError)) self.assertEqual(toTest[0].filename, 'toCheck.py'.encode()) self.assertEqual(toTest[0].line, 1) self.assertEqual(toTest[0].column, None) self.assertEqual(toTest[0].functionname, None) self.assertEqual(toTest[0].text, 'unmatched \')\':if(true)):'.encode()) self.assertEqual(toTest[0].errorname, 'SyntaxError'.encode())
def testTime(): sqlite_file = "/home/dhvani/python-sources.sqlite3" conn = sqlite3.connect(sqlite_file) c = conn.cursor() print "Success Connection to database..." c.execute("SELECT source FROM source_file INNER JOIN eligible_source ON source_file.hash = eligible_source.hash") print "Executed SELECT..." print "Fetching all rows..." all_rows = c.fetchmany(size=10)[0] print all_rows[0] #for row in all_rows: #row = all_rows[0] #print row[0], str(row[1]).encode('hex') print "Got Em..." print "Running PyPy test..." toTest = checkPyPySyntax(all_rows[0]) print "DONE" if toTest == None: #output = '' #print output tokenStream = tokenize.tokenize(StringIO.StringIO(all_rows[0]).readline, handle_token) #print all_tokens vocabularize_tokens(all_tokens) raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[0]).readline) raw_tokens_pass = [] # TO DO: REMOVE COMMENTS FROM SOURCE FOR MUTATIONS # for token in raw_tokens: # print token # if token[0] != 53: # raw_tokens_pass.append(list(token)) # source_code = tokenize.untokenize(raw_tokens_pass) insertMut(all_rows[0]) print "SUCCESS" else: print "Try again..."
def insertTokMutS(raw_tokens, all_tokens, raw_text): new_text = raw_text with open('vocabulary_mutate.json') as data_file: data = json.load(data_file) #pprint(data) #print "HI" #print len(data["indexes_m"]) #print chosenToken #print tokenize.tok_name #insTok = Token( out_tokens_loc = [] raw_tokens_pass = [] actual_token_len = [] orig = [] for token in all_tokens: token_use = token #orig.append(token_use) actual_token_len.append(token_use) for token in raw_tokens: token_use = token orig.append(token_use) raw_tokens_pass.append(token_use) #from py_mutations_hub import getRid #print "OKAY" #print len(raw_tokens_pass) #test = getRid(raw_tokens_pass, True) #print len(test) #print len(actual_token_len) #print type(radha) num_lines = len(actual_token_len) num_encode = len(orig) if (num_lines % 10 == 0): numTokensNeeded = int((num_lines / 10)) else: numTokensNeeded = int((num_lines / 10)) insToks = [] chosens = [] #print numTokensNeeded #print "import num" haha = -1 radOut = 0 curr = 0 while radOut < numTokensNeeded: #chosen = raw_tokens_pass[chosenLineInd] chosenInd = randint(0, 84) chosenToken = data["indexes_m"][chosenInd] #print "RAD S" #print radOut #print len(chosens) #print len(insToks) #print "RAD O" global new_token new_token = [] try: toksG = tokenize.tokenize( StringIO.StringIO(chosenToken).readline, handle_token) except tokenize.TokenError: pass #print type(toksG) #print len(new_token) insEdTok = new_token[0] insTok = insEdTok insToks.append(insTok) #print num_lines if radOut == (numTokensNeeded - 1): param_start = haha param_end = num_lines - 1 else: param_start = radOut * 10 param_end = param_start + 9 haha = param_end chosenLineInd = randint(param_start, param_end) #num_lines-1 #print "inds" #print chosenLineInd #print "stop" #chosen = raw_tokens_pass[chosenLineInd] chosen = actual_token_len[chosenLineInd] #chosen = Token(tokenize.tok_name[raw_tokens_pass[chosenLineInd][0]], raw_tokens_pass[chosenLineInd][1], raw_tokens_pass[chosenLineInd][2][0], raw_tokens_pass[chosenLineInd][2][1], raw_tokens_pass[chosenLineInd][3][0], raw_tokens_pass[chosenLineInd][3][1], raw_tokens_pass[chosenLineInd][4]) chosens.append(chosen) source_code = raw_text #print len(source_code) #print raw_tokens_pass[chosenLineInd][0] #print all_tokens[chosenLineInd].value #print num_lines - 1 #print len(raw_tokens_pass) #print len(actual_token_len) #print raw_tokens_pass[50][4] #print actual_token_len[chosenLineInd].line #print raw_text toAddBeforeInd = source_code.index( actual_token_len[chosenLineInd].line) #print toAddBeforeInd temp = source_code[toAddBeforeInd:toAddBeforeInd + len(actual_token_len[chosenLineInd].line)] #print temp #print actual_token_len[chosenLineInd].value #print "kobe" #print raw_tokens_pass[chosenLineInd][1] shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) change = temp.strip() check = temp.index(change) #print check #print len(temp) #print shotInd if shotInd + 1 == len(temp): shotInd = shotInd - 1 actual_target_ind = toAddBeforeInd + shotInd before = source_code[:actual_target_ind + len(raw_tokens_pass[chosenLineInd][1])] #print "B" #print before after = source_code[actual_target_ind + len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after #print raw_tokens_pass[chosenLineInd][0] if raw_tokens_pass[chosenLineInd][0] == 53: chosenToken = '\n' + chosenToken if shotInd == 0: if raw_tokens_pass[chosenLineInd][0] == 4: new_text = before + chosenToken.encode() + after else: new_text = before + ' ' + chosenToken.encode() + ' ' + after else: if raw_tokens_pass[chosenLineInd][0] == 54: new_text = before + chosenToken.encode() + after elif chosenInd == data["indexes_m"].index('\n'): #print "shiz" if after[0] == ' ': space = ' ' * (check - 1) else: space = ' ' * (check) new_text = before + chosenToken.encode() + space + after else: new_text = before + ' ' + chosenToken.encode() + ' ' + after toTest = checkPyPySyntax(new_text) print radOut if toTest == None: #print radOut #if radOut != 0: # radOut = radOut-1 #else: # radOut = 0 #print radOut curr = curr + 1 if curr > 10: radOut = radOut + 1 else: radOut = radOut insToks.remove(insTok) chosens.remove(chosen) #print "test_t" else: curr = 0 radOut = radOut + 1 ''' print "Overthink" print len(orig) print numTokensNeeded print len(insToks) print insToks[0].value print len(chosens) print chosens[0].value print "relax" ''' #print NAH #print "NAH" #print ".___." #print len(chosens) #print len(insToks) #print "NAH" return new_text, NO_TOKEN, INSERTION, out_tokens_loc, chosens, insToks
def insertTokMut(raw_tokens, raw_text): with open('vocabulary_mutate.json') as data_file: data = json.load(data_file) #pprint(data) #print "HI" #print len(data["indexes_m"]) #print chosenToken #print tokenize.tok_name #insTok = Token( out_tokens_loc = [] raw_tokens_pass = [] orig = [] for token in raw_tokens: token_use = token orig.append(token_use) if token[0] != 5: if token[0] != 6: if token[0] != 4: if token[0] != 0: raw_tokens_pass.append(token_use) #print token #print "OKAY" num_lines = len(raw_tokens_pass) num_encode = len(orig) numTokensNeeded = int((num_lines / 10)) + 1 insToks = [] chosens = [] #print numTokensNeeded #print "import num" for rad in range(numTokensNeeded): #chosen = raw_tokens_pass[chosenLineInd] chosenInd = randint(0, 84) chosenToken = data["indexes_m"][chosenInd] global new_token new_token = [] try: toksG = tokenize.tokenize( StringIO.StringIO(chosenToken).readline, handle_token) except tokenize.TokenError: pass #print type(toksG) #print len(new_token) insEdTok = new_token[0] insTok = insEdTok insToks.append(insTok) #print num_lines if rad == (numTokensNeeded - 1): param_start = param_end param_end = num_lines - 1 else: param_start = rad * 10 param_end = param_start + 9 chosenLineInd = randint(param_start, param_end) #num_lines-1 #print "inds" #print chosenLineInd #print "stop" chosen = Token(tokenize.tok_name[raw_tokens_pass[chosenLineInd][0]], raw_tokens_pass[chosenLineInd][1], raw_tokens_pass[chosenLineInd][2][0], raw_tokens_pass[chosenLineInd][2][1], raw_tokens_pass[chosenLineInd][3][0], raw_tokens_pass[chosenLineInd][3][1], raw_tokens_pass[chosenLineInd][4]) chosens.append(chosen) ''' print "Overthink" print len(orig) print numTokensNeeded print len(insToks) print insToks[0].value print len(chosens) print chosens[0].value print "relax" ''' #print NAH #print "NAH" chosenTrueLineInd = -1 indI = 0 for x in orig: if raw_tokens_pass[chosenLineInd] == x: #print "<3" chosenTrueLineInd = indI break indI = indI + 1 #print chosenTrueLineInd toIter = num_encode + (num_encode + 1) for _ in range(toIter): out_tokens_loc.extend('0') lenD = len(out_tokens_loc) for indI in range(toIter): indLook = ((chosenTrueLineInd) * 2) + 1 if indI == indLook + 1: out_tokens_loc[indI] = ('1') source_code = raw_text #print len(source_code) #print raw_tokens_pass[chosenLineInd][0] #print raw_tokens_pass[chosenLineInd][4] #print raw_text toAddBeforeInd = source_code.index(raw_tokens_pass[chosenLineInd][4]) temp = source_code[toAddBeforeInd:toAddBeforeInd + len(raw_tokens_pass[chosenLineInd][4])] #print temp #print "kobe" #print raw_tokens_pass[chosenLineInd][1] shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) change = temp.strip() check = temp.index(change) #print check #print len(temp) #print shotInd if shotInd + 1 == len(temp): shotInd = shotInd - 1 actual_target_ind = toAddBeforeInd + shotInd before = source_code[:actual_target_ind + len(raw_tokens_pass[chosenLineInd][1])] #print "B" #print before after = source_code[actual_target_ind + len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after #print raw_tokens_pass[chosenLineInd][0] if shotInd == 0: if raw_tokens_pass[chosenLineInd][0] == 4: new_text = before + chosenToken.encode() + after else: new_text = before + ' ' + chosenToken.encode() + ' ' + after else: if raw_tokens_pass[chosenLineInd][0] == 54: new_text = before + chosenToken.encode() + after elif chosenInd == data["indexes_m"].index('\n'): #print "shiz" if after[0] == ' ': space = ' ' * (check - 1) else: space = ' ' * (check) new_text = before + chosenToken.encode() + space + after else: new_text = before + ' ' + chosenToken.encode() + ' ' + after toTest = checkPyPySyntax(new_text) if toTest == None: print rad rad = rad - 1 print rad insToks.remove(insTok) chosens.remove(chosen) print "test_t" #print '------------------------------------' #print new_text toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." #print "-----------FINISHED-------------------" #insertTokMut(raw_tokens_pass, raw_text) #print "-----------FINISHED-------------------" #print "shit man" lenR = 2 lenK = 2 return lenR, raw_tokens_pass, raw_text, actual_token_len, chosens, insToks else: #print "-----------FINISHED-------------------" #print toTest[0] #print toTest[0].filename #print toTest[0].line #print toTest[0].column #print toTest[0].functionname #print toTest[0].text #print toTest[0].errorname #print type(out_tokens_loc) #print len(new_text) #print NO_TOKEN #print INSERTION #print len(out_tokens_loc) return new_text, NO_TOKEN, INSERTION, out_tokens_loc, chosens, insToks
def subTokMutS(raw_tokens, all_tokens, raw_text): new_text = raw_text with open('vocabulary_mutate.json') as data_file: data = json.load(data_file) #pprint(data) #print "HI" out_tokens_loc = [] raw_tokens_pass = [] actual_token_len = [] orig = [] for token in all_tokens: token_use = token #orig.append(token_use) actual_token_len.append(token_use) for token in raw_tokens: token_use = token orig.append(token_use) raw_tokens_pass.append(token_use) num_lines = len(actual_token_len) num_encode = len(orig) if (num_lines % 10 == 0): numTokensNeeded = int((num_lines / 10)) else: numTokensNeeded = int((num_lines / 10)) insToks = [] fixToks = [] chosens = [] #print numTokensNeeded #print "import num" inds = [] for i in actual_token_len: if i.type != 'COMMENT': if i.type != 'INDENT': if i.type != 'DEDENT': if i.type != 'NEWLINE': if i.type != 'NL': if i.type != 'ENDMARKER': inds.append(actual_token_len.index(i)) allInds = [] for nah in range(numTokensNeeded+1): temp = [] #print nah for nahHoi in range(len(inds)): if nah != 0: flag = nah * 10 pastFlag = (nah-1)*10 #print "inds" #print inds[nahHoi] #print "indsSSS" if pastFlag < inds[nahHoi] <= flag: temp.append(inds[nahHoi]) if len(temp) != 0: allInds.append(temp) curr = 0 new_text = '' haha = -1 radOut = 0 while radOut < len(allInds): if radOut == (numTokensNeeded-1): param_start = haha param_end = num_lines-1 else: param_start = radOut * 10 param_end = param_start + 9 haha = param_end toChooseArr = allInds[radOut] chosenLineIndTemp = randint(0, len(toChooseArr)-1) #num_lines-1 chosenLineInd = toChooseArr[chosenLineIndTemp] #print "ok" #print chosenLineInd chosens.append(chosenLineInd) #print radOut source_code = raw_text send = actual_token_len[chosenLineInd] fixToks.append(send) chosenInd = randint(0,84) chosenToken = data["indexes_m"][chosenInd] global new_token new_token = [] try: toksG = tokenize.tokenize(StringIO.StringIO(chosenToken).readline, handle_token) except tokenize.TokenError: pass insEdTok = new_token[0] insTok = insEdTok insToks.append(insTok) indexToRemove = source_code.index(actual_token_len[chosenLineInd].line) temp = source_code[indexToRemove:indexToRemove+len(actual_token_len[chosenLineInd].line)+1] change = temp.strip() check = change.find(raw_tokens_pass[chosenLineInd][1]) shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) change = temp.strip() check = temp.index(change) #print "WHAT" #print change #print "TEMP" #print temp #print shotInd actual_target_ind = indexToRemove + shotInd #print raw_tokens_pass[chosenLineInd][1] #print len(raw_tokens_pass[chosenLineInd][1]) #print len(change) if check == 0 and len(raw_tokens_pass[chosenLineInd][1]) == len(change): before = source_code[:indexToRemove] else: before = source_code[:actual_target_ind] #print "B" #print before after = source_code[actual_target_ind+len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after #print chosenToken.encode() if check == 0: #print "GOT EM" if len(after) > 0: if after[0] == ' ': new_text = before + chosenToken.encode() + after else: new_text = before + chosenToken.encode() + after else: new_text = before + chosenToken.encode() + after else: if chosenInd == data["indexes_m"].index('\n'): #print "shiz" if after[0] == ' ': space = ' ' * (check-1) else: space = ' ' * (check) new_text = before + chosenToken.encode() + space + after else: #print "WAS HERE" new_text = before + chosenToken.encode() + after #print actual_target_ind #print '-------------------------------' #print new_text toTest = checkPyPySyntax(new_text) if toTest == None: #print radOut #if radOut != 0: # radOut = radOut-1 #else: # radOut = 0 #print radOut curr = curr + 1 if curr > 10: radOut = radOut + 1 else: radOut = radOut fixToks.remove(send) chosens.remove(chosenLineInd) insToks.remove(insTok) #print "test_t" else: curr = 0 radOut = radOut + 1 return new_text, YES_TOKEN, SUBSTITUTION, chosens, fixToks, insToks
def subTokMut(raw_tokens, raw_text): with open('vocabulary_mutate.json') as data_file: data = json.load(data_file) #pprint(data) #print "HI" chosenInd = randint(0,84) chosenToken = data["indexes_m"][chosenInd] #print chosenToken global new_token new_token = [] try: toksG = tokenize.tokenize(StringIO.StringIO(chosenToken).readline, handle_token) except tokenize.TokenError: pass #print type(toksG) #print len(new_token) insEdTok = new_token[0] insTokS = insEdTok raw_tokens_pass = [] out_tokens_loc = [] orig = [] for token in raw_tokens: token_use = token orig.append(token_use) if token[0] != 5: if token[0] != 6: if token[0] != 4: if token[0] != 54: if token[0] != 0: if token[0] != 53: raw_tokens_pass.append(token_use) #print token #print "OKAY" num_lines = len(raw_tokens_pass) num_encode = len(orig) chosenLineInd = randint(0,num_lines-1) # num_lines-1 chosenTrueLineInd = -1 indI = 0 for x in orig: if raw_tokens_pass[chosenLineInd] == x: #print "<3" chosenTrueLineInd = indI break indI = indI + 1 #print chosenTrueLineInd toIter = num_encode + (num_encode+1) for _ in range(toIter): out_tokens_loc.extend('0') lenD = len(out_tokens_loc) for indI in range(toIter): indLook = ((chosenTrueLineInd) * 2) + 1 if indI == indLook: out_tokens_loc[indI] = ('1') source_code = raw_text send = Token(tokenize.tok_name[raw_tokens_pass[chosenLineInd][0]], raw_tokens_pass[chosenLineInd][1], raw_tokens_pass[chosenLineInd][2][0], raw_tokens_pass[chosenLineInd][2][1], raw_tokens_pass[chosenLineInd][3][0], raw_tokens_pass[chosenLineInd][3][1], raw_tokens_pass[chosenLineInd][4]) indexToRemove = source_code.index(raw_tokens_pass[chosenLineInd][4]) temp = source_code[indexToRemove:indexToRemove+len(raw_tokens_pass[chosenLineInd][4])+1] change = temp.strip() check = change.find(raw_tokens_pass[chosenLineInd][1]) shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) change = temp.strip() check = temp.index(change) #print "WHAT" #print change #print "TEMP" #print temp #print shotInd actual_target_ind = indexToRemove + shotInd #print raw_tokens_pass[chosenLineInd][1] #print len(raw_tokens_pass[chosenLineInd][1]) #print len(change) if check == 0 and len(raw_tokens_pass[chosenLineInd][1]) == len(change): before = source_code[:indexToRemove] else: before = source_code[:actual_target_ind] #print "B" #print before after = source_code[actual_target_ind+len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after if check == 0: #print "GOT EM" if len(after) > 0: if after[0] == ' ': new_text = before + chosenToken.encode() + after else: new_text = before + chosenToken.encode() + after else: new_text = before + chosenToken.encode() + after else: if chosenInd == data["indexes_m"].index('\n'): #print "shiz" if after[0] == ' ': space = ' ' * (check-1) else: space = ' ' * (check) new_text = before + chosenToken.encode() + space + after else: #print "WAS HERE" new_text = before + chosenToken.encode() + after #print actual_target_ind #print '-------------------------------' #print new_text toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." #subTokMut(raw_tokens_pass, raw_text) #print new_text lenR = 2 lenK = 2 return lenR, raw_tokens_pass, raw_text, lenK, send, insTokS else: #print toTest[0] #print toTest[0].filename #print toTest[0].line #print toTest[0].column #print toTest[0].functionname #print toTest[0].text #print toTest[0].errorname return new_text, YES_TOKEN, SUBSTITUTION, out_tokens_loc, send, insTokS
def deleteTokMutS(raw_tokens, all_tokens, raw_text): #out_tokens_loc = [] raw_tokens_pass = [] orig = [] actual_token_len = [] for token in all_tokens: token_use = token #orig.append(token_use) actual_token_len.append(token_use) for token in raw_tokens: token_use = token orig.append(token_use) raw_tokens_pass.append(token_use) #print token #print "OKAY" num_lines = len(actual_token_len) num_encode = len(orig) if (num_lines % 10 == 0): numTokensNeeded = int((num_lines / 10)) else: numTokensNeeded = int((num_lines / 10)) fixToks = [] chosens = [] haha = -1 radOut = 0 inds = [] for i in actual_token_len: if i.type != 'COMMENT': if i.type != 'INDENT': if i.type != 'DEDENT': if i.type != 'NEWLINE': if i.type != 'NL': if i.type != 'ENDMARKER': inds.append(actual_token_len.index(i)) #print inds #print len(inds) #print len(actual_token_len) #print type(radha) allInds = [] for nah in range(numTokensNeeded + 1): temp = [] #print nah for nahHoi in range(len(inds)): if nah != 0: flag = nah * 10 pastFlag = (nah - 1) * 10 #print "inds" #print inds[nahHoi] #print "indsSSS" if pastFlag < inds[nahHoi] <= flag: temp.append(inds[nahHoi]) if len(temp) != 0: allInds.append(temp) #print allInds #print type(radha) #print len(allInds) #print numTokensNeeded curr = 0 new_text = '' while radOut < len(allInds): if radOut == (numTokensNeeded - 1): param_start = haha param_end = num_lines - 1 else: param_start = radOut * 10 param_end = param_start + 9 haha = param_end toChooseArr = allInds[radOut] chosenLineIndTemp = randint(0, len(toChooseArr) - 1) #num_lines-1 chosenLineInd = toChooseArr[chosenLineIndTemp] #print "ok" #print chosenLineInd chosens.append(chosenLineInd) #print radOut source_code = raw_text send = actual_token_len[chosenLineInd] #send = Token(tokenize.tok_name[raw_tokens_pass[chosenLineInd][0]], raw_tokens_pass[chosenLineInd][1], raw_tokens_pass[chosenLineInd][2][0], raw_tokens_pass[chosenLineInd][2][1], raw_tokens_pass[chosenLineInd][3][0], raw_tokens_pass[chosenLineInd][3][1], raw_tokens_pass[chosenLineInd][4]) fixToks.append(send) indexToRemove = source_code.index(actual_token_len[chosenLineInd].line) #print actual_token_len[chosenLineInd].line temp = source_code[indexToRemove:indexToRemove + len(actual_token_len[chosenLineInd].line) + 1] #print "temp" #print temp change = temp.strip() check = change.find(raw_tokens_pass[chosenLineInd][1]) #print actual_token_len[chosenLineInd].value #print raw_tokens_pass[chosenLineInd][1] shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) #print shotInd #print change #print "TEMP" #print temp #print shotInd actual_target_ind = indexToRemove + shotInd #print raw_tokens_pass[chosenLineInd][1] #print len(raw_tokens_pass[chosenLineInd][1]) #print len(change) if check == 0 and len( raw_tokens_pass[chosenLineInd][1]) == len(change): before = source_code[:indexToRemove] else: before = source_code[:actual_target_ind] #print "B" #print before after = source_code[actual_target_ind + len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after if check == 0: #print "GOT EM" new_text = before + after[1:] else: new_text = before + after #print actual_target_ind #print '-------------------------------' #print new_text toTest = checkPyPySyntax(new_text) if toTest == None: #print radOut #if radOut != 0: # radOut = radOut-1 #else: # radOut = 0 #print radOut curr = curr + 1 if curr > 10: radOut = radOut + 1 else: radOut = radOut fixToks.remove(send) chosens.remove(chosenLineInd) #print "test_t" else: curr = 0 radOut = radOut + 1 return new_text, YES_TOKEN, DELETION, chosens, fixToks
def deleteTokMut(raw_tokens, raw_text): out_tokens_loc = [] raw_tokens_pass = [] orig = [] for token in raw_tokens: token_use = token orig.append(token_use) if token[0] != 5: if token[0] != 6: if token[0] != 4: if token[0] != 54: if token[0] != 53: if token[0] != 0: raw_tokens_pass.append(token_use) #print token #print "OKAY" num_lines = len(raw_tokens_pass) num_encode = len(orig) chosenLineInd = randint(0, num_lines - 1) #num_lines-1 chosenTrueLineInd = -1 indI = 0 for x in orig: if raw_tokens_pass[chosenLineInd] == x: #print "<3" chosenTrueLineInd = indI break indI = indI + 1 #print chosenTrueLineInd toIter = num_encode + (num_encode + 1) for _ in range(toIter): out_tokens_loc.extend('0') lenD = len(out_tokens_loc) for indI in range(toIter): indLook = ((chosenTrueLineInd) * 2) + 1 if indI == indLook: out_tokens_loc[indI] = ('1') source_code = raw_text send = Token(tokenize.tok_name[raw_tokens_pass[chosenLineInd][0]], raw_tokens_pass[chosenLineInd][1], raw_tokens_pass[chosenLineInd][2][0], raw_tokens_pass[chosenLineInd][2][1], raw_tokens_pass[chosenLineInd][3][0], raw_tokens_pass[chosenLineInd][3][1], raw_tokens_pass[chosenLineInd][4]) indexToRemove = source_code.index(raw_tokens_pass[chosenLineInd][4]) temp = source_code[indexToRemove:indexToRemove + len(raw_tokens_pass[chosenLineInd][4]) + 1] change = temp.strip() check = change.find(raw_tokens_pass[chosenLineInd][1]) shotInd = temp.index(raw_tokens_pass[chosenLineInd][1]) #print change #print "TEMP" #print temp #print shotInd actual_target_ind = indexToRemove + shotInd #print raw_tokens_pass[chosenLineInd][1] #print len(raw_tokens_pass[chosenLineInd][1]) #print len(change) if check == 0 and len(raw_tokens_pass[chosenLineInd][1]) == len(change): before = source_code[:indexToRemove] else: before = source_code[:actual_target_ind] #print "B" #print before after = source_code[actual_target_ind + len(raw_tokens_pass[chosenLineInd][1]):] #print "A" #print after if check == 0: #print "GOT EM" new_text = before + after[1:] else: new_text = before + after #print actual_target_ind #print '-------------------------------' #print new_text toTest = checkPyPySyntax(new_text) if toTest == None: #print "Try again..." #deleteTokMut(raw_tokens_pass, raw_text) lenR = 2 lenK = 2 return lenR, raw_tokens_pass, raw_text, lenK, send else: #print toTest[0] #print toTest[0].filename #print toTest[0].line #print toTest[0].column #print toTest[0].functionname #print toTest[0].text #print toTest[0].errorname #print "-----------FINISHED-------------------" #print chosenLineInd+1 #print out_tokens_loc return new_text, YES_TOKEN, DELETION, out_tokens_loc, send
def insertMut(raw_text): with open('vocabulary_mutate.json') as data_file: data = json.load(data_file) pprint(data) print "HI" chosenInd = randint(0, 86) chosenToken = data["indexes_m"][chosenInd] print chosenToken raw_text = str(raw_text) num_lines = raw_text.count('\n') + 1 chosenLineInd = randint(1, num_lines) print chosenLineInd if chosenLineInd == 1: first_strip = raw_text[:find_nth(raw_text, "\n", chosenLineInd)] else: first_strip = raw_text[find_nth(raw_text, "\n", chosenLineInd - 1) + 1:find_nth(raw_text, "\n", chosenLineInd)] print len(first_strip) print first_strip chosenColInd = randint(1, len(first_strip) + 2) first_col_strip = first_strip[:chosenColInd] last_col_strip = first_strip[chosenColInd:] new_line = first_col_strip + chosenToken + last_col_strip print new_line if chosenLineInd == 1: print "F" last_text = raw_text[find_nth(raw_text, "\n", chosenLineInd) + 1:] print "L" print last_text final_code_text = new_line + "\n" + last_text elif chosenLineInd == num_lines: first_text = raw_text[:find_nth(raw_text, "\n", chosenLineInd - 1)] print "F" print first_text final_code_text = first_text + "\n" + new_line else: first_text = raw_text[:find_nth(raw_text, "\n", chosenLineInd - 1)] print "F" print first_text last_text = raw_text[find_nth(raw_text, "\n", chosenLineInd) + 1:] print "L" print last_text final_code_text = first_text + new_line.encode() + "\n" + last_text print '------------------------------------' print final_code_text print num_lines toTest = checkPyPySyntax(final_code_text) if toTest == None: print "Try again..." insertMut(raw_text) else: print toTest[0] print toTest[0].filename print toTest[0].line print toTest[0].column print toTest[0].functionname print toTest[0].text print toTest[0].errorname print "-----------FINISHED-------------------"
def perform(curr): sqlite_file = "/home/dhvani/python-sources.sqlite3" conn = sqlite3.connect(sqlite_file) c = conn.cursor() #print "Success Connection to database..." c.execute("SELECT source FROM source_file INNER JOIN eligible_source ON source_file.hash = eligible_source.hash") #print "Executed SELECT..." #print "Fetching all rows..." all_rows = c.fetchmany(size=2600) conn.close() # Close the connection to SQL #for curr in range(2): #print all_rows[curr][0] if True: #curr = 13 #print all_rows[curr][0] #print "Got Em..." #print "Running PyPy test..." #print curr toTest = checkPyPySyntax(all_rows[curr][0]) #print "DONE" #print "CURRENT: " # print curr if toTest == None: #print "here" global all_tokens all_tokens = [] global indexed_tokens indexed_tokens = [] tokenStream = tokenize.tokenize(StringIO.StringIO(all_rows[curr][0]).readline, handle_token) #print "RAW" #print len(all_tokens) allGood = [] global all_tokens allGood = all_tokens[:] print len(allGood) #print "come on" #print len(all_tokens) #print len(allGood) one_hot_good = vocabularize_tokens(all_tokens, False) #one_hot_good_out = [] #for x in range(len(all_tokens)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1)): #toAdd = [] #toAdd = [0] * NUM_BITS_OUTPUT #toAdd[0] = 0 #toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1) #toAdd = [1] #one_hot_good_out.append(1) #print "DHVANI" #print len(one_hot_good) #print len(allGood) #print len(all_tokens) raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline) source_code = str(all_rows[curr][0]) #MUTATIONS PER TOKEN # INSERT #global all_tokens #all_tokens = [] global indexed_tokens indexed_tokens = [] #print "RAW" #print len(all_tokens) #passToks = all_tokens[:] #print len(passToks) #print "come global all_tokens print "dhadha" #print len(all_tokens) #print len(allGood) passBall = allGood[:] new_i_text, NO_TOKEN, INSERTION, out_tokens_loc, chosenTrueLineInds, insToks = insertTokMutS(raw_tokens, passBall, source_code) while isinstance(new_i_text, int): new_i_text, NO_TOKEN, INSERTION, out_tokens_loc, chosenTrueLineInds, insToks = insertTokMutS(NO_TOKEN, out_tokens_loc, INSERTION) if isinstance(new_i_text, str): break print "here" new_tokens_ins = allGood[:] #print "BOL BOL BOL" #print len(new_tokens_ins) temp = insToks[:] for insTok in temp: if insTok.type == "NL": insToks[insToks.index(insTok)].type = "NEWLINE" temp2 = chosenTrueLineInds[:] for chosenTrueLineInd in temp2: vocab_entry = open_closed_tokens(chosenTrueLineInd) chosenTrueLineInds[chosenTrueLineInds.index(chosenTrueLineInd)].value = vocab_entry #print vocab_entry #print "OK ------------------------------" #print len(new_tokens_ins) #print len(chosenTrueLineInds) #print len(all_tokens) bruhInd = 0 for wow in range(len(chosenTrueLineInds)): bruhInd = -1 iterInd = 0 chosenTrueLineInd = chosenTrueLineInds[wow] insTok = insToks[wow] #print len(all_tokens) for a in allGood: if a == chosenTrueLineInd: bruhInd = iterInd iterInd = iterInd + 1 #print bruhInd + 1 #print bruhInd #print "gotchu" if bruhInd != -1: #print bruhInd #print "gotchu" new_tokens_ins.insert(bruhInd+1, insTok) #print "START DEBUG" #print insTok.value #print len(new_tokens_ins) #print new_tokens_ins[bruhInd+1].value one_hot_bad_ins = vocabularize_tokens(new_tokens_ins, True) #print one_hot_bad_ins[bruhInd+1+WINDOW_SIZE-1] #print "DONE DEBUG" #print len(new_tokens_ins) #print len(one_hot_bad_ins) #if(bruhInd+1 < len(new_tokens_ins)): #print "NEXT STEP...C" #passInsErrorInd = (bruhInd+1)+(WINDOW_SIZE-1) #one_hot_bad_ins_out = [] #trueErrorInd = (bruhInd+1)+(WINDOW_SIZE-1) # INSERT OUT_PUT #iterNum = len(new_tokens_ins)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1) #print "divide" #print trueErrorInd #print iterNum ''' for x in range(iterNum): #if x <= trueErrorInd <= (x+trueErrorInd): #if x <= trueErrorInd <= x+(WINDOW_SIZE-1): if True: # DIFF - ACTUAL ERROR #print x #toAdd = [] #toAdd = [0] * NUM_BITS_OUTPUT #toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR #toAdd[1] = 0 #if NO_TOKEN != None: # toAdd[2] = 0 # toAdd[3] = 1 #if INSERTION != None: # toAdd[4] = 0 # toAdd[5] = 0 # toAdd[6] = 1 #toAdd[7] = 1 toAdd = [0,3,6,7] one_hot_bad_ins_out.append(toAdd) else: toAdd = [] toAdd = [0] * NUM_BITS_OUTPUT toAdd[0] = 1 toAdd[1] = 0 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1) one_hot_bad_ins_out.append(toAdd) ''' #print "Morning" #print len(new_tokens_ins) #print len(one_hot_bad_ins_out) #print one_hot_bad_ins_out[trueErrorInd] # DELETE raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline) #global all_tokens #all_tokens = [] global indexed_tokens indexed_tokens = [] #print type(raw_tokens) #print type(source_code) passBall = allGood[:] new_d_text, YES_TOKEN, DELETION, chosens, fixToks = deleteTokMutS(raw_tokens,passBall, source_code) while isinstance(new_d_text, int): new_d_text, YES_TOKEN, DELETION, chosens, fixToks = deleteTokMutS(YES_TOKEN, DELETION) if isinstance(new_d_text, str): break print "NEXT STEP..." new_tokens_del = allGood[:] tempFix = fixToks[:] for send in tempFix: vocab_entry = open_closed_tokens(send) fixToks[fixToks.index(send)].value = vocab_entry #print "len" #print len(allGood) #print len(fixToks) #print len(new_tokens_del) removeInds = [] for wow in range(len(chosens)): bruhInd = -1 iterInd = 0 send = fixToks[wow] #print send.value for a in allGood: if a == send: bruhInd = iterInd iterInd = iterInd + 1 #print bruhInd #print "CHECK" #print len(new_tokens_del) removeInds.append(bruhInd) #del new_tokens_del[bruhInd] #print len(new_tokens_del) #print "DEL ROR" for r in reversed(removeInds): del new_tokens_del[r] #print len(new_tokens_del) one_hot_bad_del = vocabularize_tokens(new_tokens_del, True) #one_hot_bad_del_out = [] #trueErrorInd = (bruhInd)+(WINDOW_SIZE-1) # DELETE OUT_PUT #iterNum = len(new_tokens_del)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1) #print "divide" #print len(send) #print trueErrorInd #print iterNum #print "delete" #print send.type #print send.value #oneH_ind_deleted = set_from_json_nonarr(send, True) #print oneH_ind_deleted #print "rad" ''' for x in range(iterNum): #if x <= trueErrorInd <= (x+trueErrorInd): if True: # DIFF - ACTUAL ERROR #print x #toAdd = [] #toAdd = [0] * NUM_BITS_OUTPUT #toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR #toAdd[1] = 0 #if YES_TOKEN != None: # toAdd[2] = 1 # toAdd[3] = 0 #if DELETION != None: # toAdd[4] = 0 # toAdd[5] = 1 # toAdd[6] = 0 #toAdd[7] = 1 #toAdd[17] = 1 toAdd = [0,2,5,7,17] one_hot_bad_del_out.append(toAdd) else: toAdd = [] toAdd = [0] * NUM_BITS_OUTPUT toAdd[0] = 0 toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1) one_hot_bad_del_out.append(toAdd) ''' #print "Morning" #print len(allGood) #print len(one_hot_bad_del_out) #print one_hot_bad_del_out[trueErrorInd] # SUB raw_tokens = tokenize.generate_tokens(StringIO.StringIO(all_rows[curr][0]).readline) #global all_tokens #all_tokens = [] global indexed_tokens indexed_tokens = [] #print type(raw_tokens) passBall = allGood[:] new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(raw_tokens, passBall, source_code) while isinstance(new_s_text, int): new_s_text, YES_TOKEN, SUBSTITUTION, chosensS, fixToksS, insToksS = subTokMutS(YES_TOKEN, SUBSTITUTION) if isinstance(new_s_text, str): break #print "NEXT STEP..." # SUB DELETE new_tokens_sub = allGood[:] temp = insToksS[:] for insTok in temp: if insTok.type == "NL": insToks[insToksS.index(insTok)].type = "NEWLINE" tempFix = fixToksS[:] for send in tempFix: vocab_entry = open_closed_tokens(send) fixToksS[fixToksS.index(send)].value = vocab_entry removeInds = [] for wow in range(len(chosensS)): bruhInd = -1 iterInd = 0 send = fixToksS[wow] #print send.value for a in allGood: if a == send: bruhInd = iterInd iterInd = iterInd + 1 #print bruhInd #print "CHECK" #print len(new_tokens_del) removeInds.append(bruhInd) #del new_tokens_del[bruhInd] #print len(new_tokens_del) #print "DEL ROR" # SUB INSERT #print len(removeInds) #print len(insToksS) comeIter = len(insToksS)-1 for r in reversed(removeInds): del new_tokens_sub[r] #print insToksS[comeIter].value new_tokens_sub.insert(r, insToksS[comeIter]) comeIter -= 1 #for x in new_tokens_sub: #print x.value #print len(new_tokens_del) #print insTokS.type #print insTokS.value #print "LUNCH" one_hot_bad_sub = vocabularize_tokens(new_tokens_sub, True) #one_hot_bad_sub_out = [] #trueErrorInd = (bruhInd)+(WINDOW_SIZE-1) # SUB OUT_PUT #iterNum = len(new_tokens_sub)+(WINDOW_SIZE-1)+(WINDOW_SIZE-1) #print "divide" #print len(send) #print trueErrorInd #print iterNum #print "sub" #print sendS.type #print sendS.value #oneH_sub_switch = set_from_json_nonarr(sendS, True) #print oneH_sub_switch #print "rad" ''' for x in range(iterNum): #if x <= trueErrorInd <= (x+trueErrorInd): #if x <= trueErrorInd <= x+(WINDOW_SIZE-1): if True: # DIFF - ACTUAL ERROR #print x #toAdd = [] #toAdd = [0] * NUM_BITS_OUTPUT #toAdd[0] = 1 # FIRST BIT (10) - INDICATE ERROR #toAdd[1] = 0 #toAdd[2] = 1 #toAdd[3] = 0 #toAdd[4] = 1 #toAdd[5] = 0 #toAdd[6] = 0 #toAdd[7] = 1 #toAdd[17] = 1 toAdd = [0,2,4,7,17] one_hot_bad_sub_out.append(toAdd) else: toAdd = [] toAdd = [0] * NUM_BITS_OUTPUT toAdd[0] = 0 toAdd[1] = 1 # FIRST BIT (01) - INDICATE NO ERROR (1 because rest are 0 and so add up to 1) one_hot_bad_sub_out.append(toAdd) ''' #print "Morning" #print len(allGood) #print len(all_tokens) #print len(one_hot_bad_sub_out) #print one_hot_bad_sub_out[trueErrorInd] # MUTATIONS PER CHARACTER # insertMut(source_code) #deleteMut(source_code]) #print "LEN" #print one_hot_good[0] #print one_hot_bad[0] #print "----------INPUT-------------" #print len(one_hot_good) #print len(one_hot_bad_ins) #print len(one_hot_bad_del) #print len(one_hot_bad_sub) #print "----------OUTPUT-------------" #print len(one_hot_good_out) #print len(one_hot_bad_ins_out) #print len(one_hot_bad_del_out) #print len(one_hot_bad_sub_out) #print curr #one_hot_all = np.concatenate((one_hot_good, one_hot_bad), axis=0) #print "SUCCESS" #sizes = [len(one_hot_good), len(one_hot_bad_ins),len(one_hot_bad_del),len(one_hot_bad_sub)] #minSize = int(min(float(siz) for siz in sizes)) # min of a generator #return minSize #toPassGood = [] #print len(one_hot_good) #for good in one_hot_good: # ind = good.index(1.0) # toPassGood.append(ind) #print len(toPassGood) #print one_hot_bad_sub_out #print type(radha) toPass = [one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub] #toPass = sparse.csr_matrix(toPassMatrix) #print toPass #print type(radha) #cPickle.dump(one_hot_bad_ins, fileStore) #cPickle.dump(one_hot_bad_del, fileStore) #cPickle.dump(one_hot_bad_sub, fileStore) #cPickle.dump(one_hot_good_out, fileStore) #cPickle.dump(one_hot_bad_ins_out, fileStore) #cPickle.dump(one_hot_bad_del_out, fileStore) #cPickle.dump(one_hot_bad_sub_out, fileStore) #cPickle.dump(passInsErrorInd, fileStore) return toPass #return one_hot_good, one_hot_bad_ins, one_hot_bad_del, one_hot_bad_sub, one_hot_good_out, one_hot_bad_ins_out, one_hot_bad_del_out, one_hot_bad_sub_out,passInsErrorInd else: print "Try again..." print curr #print all_rows[curr][0] return -1
def deleteMut(raw_text): first_strip = '' while len(first_strip) == 0: raw_text = str(raw_text) num_lines = raw_text.count('\n') + 1 chosenLineInd = randint(1, num_lines) print chosenLineInd if chosenLineInd == 1: first_strip = raw_text[:find_nth(raw_text, "\n", chosenLineInd)] else: first_strip = raw_text[find_nth(raw_text, "\n", chosenLineInd - 1) + 1:find_nth(raw_text, "\n", chosenLineInd)] print len(first_strip) print first_strip chosenColInd = randint(1, len(first_strip) + 2) first_col_strip = first_strip[:chosenColInd - 1] last_col_strip = first_strip[chosenColInd:] new_line = first_col_strip + last_col_strip if chosenLineInd == 1: print "F" last_text = raw_text[find_nth(raw_text, "\n", chosenLineInd) + 1:] print "L" print last_text final_code_text = new_line + "\n" + last_text elif chosenLineInd == num_lines: first_text = raw_text[:find_nth(raw_text, "\n", chosenLineInd - 1)] print "F" print first_text final_code_text = first_text + "\n" + new_line else: first_text = raw_text[:find_nth(raw_text, "\n", chosenLineInd - 1)] print "F" print first_text last_text = raw_text[find_nth(raw_text, "\n", chosenLineInd) + 1:] print "L" print last_text final_code_text = first_text + new_line.encode() + "\n" + last_text print '------------------------------------' print final_code_text print num_lines toTest = checkPyPySyntax(final_code_text) if toTest == None: print "Try again..." deleteMut(raw_text) else: print toTest[0] print toTest[0].filename print toTest[0].line print toTest[0].column print toTest[0].functionname print toTest[0].text print toTest[0].errorname print "-----------FINISHED-------------------"
def test_syntax_ok(self): toTest = checkPyPySyntax('a=1+2') self.assertTrue(toTest is None)