def get_called_func_arg_lists(self): """ Returns two lists: - called_funcs: [func_name1, func_name2, ...] - called_args: [func_name] = [arg1, arg2, ...] Only the function names and keywords arguments that occur in the mined API docs are listed. """ assert self.current_node # NOTE: setting call_nodes as a member variable is important here, as # individual baseline methods may use the call_nodes to directly get # function-call relations. self.call_nodes = findCallNodes(self.current_node) called_funcs = [] called_args = {} for call in self.call_nodes: func_name, keywords = extractCallComponents(call) if func_name in self.fs: called_funcs.append(func_name) called_args[func_name] = [] for k in keywords: if (func_name, k) in self.fas: called_args[func_name].append(k) return called_funcs, called_args
def rank_funcs(self, query, funcs, parent): """ The score of each function is the maximum of the following: - similarity(func, query) - similarity(func + arg, query) for any arg in the current call If self.bow == True, then will try to use parent.call_nodes to get functions and arguments and funcs will be ignored. If parent.call_nodes is not available, then funcs will be used. TODO: make use of the value of arguments in searching, too. e.g., "red" as in color="red". """ query = query.lower() self.model.init_sims() q_vec = self.get_bow_representation(query) if self.bow \ and parent is not None \ and hasattr(parent, 'call_nodes'): funcs = [] scores = [] for call in parent.call_nodes: func, keywords = extractCallComponents(call) tmp_vecmat = np.zeros( (1 + len(keywords), self.model.vector_size)) score = 0 if func in self.func_lookup: func_idx = self.func_lookup[func] tmp_vecmat[0] += self.f_vecmat[func_idx] if func in self.func_arg_lookup: for i, arg in enumerate(keywords): if arg in self.func_arg_lookup[func]: arg_idx = self.func_arg_lookup[func][arg] tmp_vecmat[i + 1] += self.a_vecmat[func][arg_idx] tmp_scores = np.dot(q_vec, tmp_vecmat.T) score = tmp_scores.max() funcs.append(func) scores.append(score) elif self.bow: func_vecmat = np.zeros((len(funcs), self.model.vector_size)) for i, func in enumerate(funcs): func_idx = self.func_lookup[func] func_vecmat[i] += self.f_vecmat[func_idx] scores = np.dot(q_vec, func_vecmat.T) else: func_vecmat = np.zeros((len(funcs), self.model.vector_size)) for i, func in enumerate(funcs): if func in self.model.vocab: func_idx = self.model.vocab[func].index func_vecmat[i] += self.model.syn0norm[func_idx] scores = np.dot(q_vec, func_vecmat.T) sorted_funcs = sorted(zip(funcs, scores), key=lambda x: x[1], reverse=True) return sorted_funcs
def rank_funcs(self, query, funcs, parent): """ The score of each function is the maximum of the following: - similarity(func, query) - similarity(func + arg, query) for any arg in the current call If self.bow == True, then will try to use parent.call_nodes to get functions and arguments and funcs will be ignored. If parent.call_nodes is not available, then funcs will be used. TODO: make use of the value of arguments in searching, too. e.g., "red" as in color="red". """ query = query.lower() self.model.init_sims() q_vec = self.get_bow_representation(query) if self.bow \ and parent is not None \ and hasattr(parent, 'call_nodes'): funcs = [] scores = [] for call in parent.call_nodes: func, keywords = extractCallComponents(call) tmp_vecmat = np.zeros((1 + len(keywords), self.model.vector_size)) score = 0 if func in self.func_lookup: func_idx = self.func_lookup[func] tmp_vecmat[0] += self.f_vecmat[func_idx] if func in self.func_arg_lookup: for i,arg in enumerate(keywords): if arg in self.func_arg_lookup[func]: arg_idx = self.func_arg_lookup[func][arg] tmp_vecmat[i+1] += self.a_vecmat[func][arg_idx] tmp_scores = np.dot(q_vec, tmp_vecmat.T) score = tmp_scores.max() funcs.append(func) scores.append(score) elif self.bow: func_vecmat = np.zeros((len(funcs), self.model.vector_size)) for i, func in enumerate(funcs): func_idx = self.func_lookup[func] func_vecmat[i] += self.f_vecmat[func_idx] scores = np.dot(q_vec, func_vecmat.T) else: func_vecmat = np.zeros((len(funcs), self.model.vector_size)) for i, func in enumerate(funcs): if func in self.model.vocab: func_idx = self.model.vocab[func].index func_vecmat[i] += self.model.syn0norm[func_idx] scores = np.dot(q_vec, func_vecmat.T) sorted_funcs = sorted(zip(funcs, scores), key=lambda x:x[1], reverse=True) return sorted_funcs
def rank_funcs(self, query, funcs, parent): """ Returns an ordered list of tuples. The first element of each tuple should be func. """ assert parent.current_node call_nodes = findCallNodes(parent.current_node) func_scores = {} # [func] = score for call in call_nodes: func, keywords = extractCallComponents(call) if func in funcs: myast_call = MyAST(node=call) score = self.model.scoreFullTree(query, myast_call) if func not in func_scores or func_scores[func] < score: func_scores[func] = score sorted_funcs = sorted(func_scores.items(), key=lambda x:x[1], reverse=True) return sorted_funcs
def rank_funcs(self, query, funcs, parent): """ Returns an ordered list of tuples. The first element of each tuple should be func. """ assert parent.current_node call_nodes = findCallNodes(parent.current_node) func_scores = {} # [func] = score for call in call_nodes: func, keywords = extractCallComponents(call) if func in funcs: myast_call = MyAST(node=call) score = self.model.scoreFullTree(query, myast_call) if func not in func_scores or func_scores[func] < score: func_scores[func] = score sorted_funcs = sorted(func_scores.items(), key=lambda x: x[1], reverse=True) return sorted_funcs
def __init__(self): TEST_VECTOR_BIN_FILE = 'output/vectors-flat-mpl-0205.bin' # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-5gram.bin' # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-stem-3gram.bin' MAXNGRAM = 3 for i in range(2): print '\nRound %d\n'%i if i == 0: print 'Loading a big vector file. Will take a while....' wb = Word2vecBaseline(TEST_VECTOR_BIN_FILE, maxngram=MAXNGRAM) else: from annotate_code_with_api import get_fu_fau fu_fau = get_fu_fau() wb = Word2vecBaseline(wb.model, maxngram=MAXNGRAM, fu_fau=fu_fau) TEST_CODE = """plt.bar(x, y, color="red") plt.title('hello world') plt.xlim(1,6)""" node = ast.parse(TEST_CODE) self.call_nodes = findCallNodes(node) funcs = [extractCallComponents(x)[0] for x in self.call_nodes] results = wb.rank_funcs('set colors of the faces', funcs, self) for x in results: print x[0], x[1] print '-------------' results = wb.rank_args( 'add shadow to legend', 'legend', ['shadow', 'bbox_to_anchor', 'fontsize']) for x in results: print x[0], x[1]
def __init__(self): TEST_VECTOR_BIN_FILE = 'output/vectors-flat-mpl-0205.bin' # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-5gram.bin' # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-stem-3gram.bin' MAXNGRAM = 3 for i in range(2): print '\nRound %d\n' % i if i == 0: print 'Loading a big vector file. Will take a while....' wb = Word2vecBaseline(TEST_VECTOR_BIN_FILE, maxngram=MAXNGRAM) else: from annotate_code_with_api import get_fu_fau fu_fau = get_fu_fau() wb = Word2vecBaseline(wb.model, maxngram=MAXNGRAM, fu_fau=fu_fau) TEST_CODE = """plt.bar(x, y, color="red") plt.title('hello world') plt.xlim(1,6)""" node = ast.parse(TEST_CODE) self.call_nodes = findCallNodes(node) funcs = [extractCallComponents(x)[0] for x in self.call_nodes] results = wb.rank_funcs('set colors of the faces', funcs, self) for x in results: print x[0], x[1] print '-------------' results = wb.rank_args( 'add shadow to legend', 'legend', ['shadow', 'bbox_to_anchor', 'fontsize']) for x in results: print x[0], x[1]
seen_code_set = set() count_dupe = 0 for i in xrange(len(all_codes)): if not svgs[i]: continue code = all_codes[i].strip() if code in seen_code_set: # Dedupe count_dupe += 1 continue else: seen_code_set.add(code) node = ast.parse(code) calls = findCallNodes(node) for call in calls: func_name, keywords = extractCallComponents(call) if func_name in plot_commands_set: examples[func_name].add(i) print 'There are %d duplicates' % count_dupe print '"Scoring" code examples.' # Sorting function: number of chars in the code example examples = dict(examples) for func, idxs in examples.items(): examples[func] = sorted( idxs, key=lambda x: get_effective_code_len(all_codes[x])) bh.save('plotcommands_examples', examples)
with open('supp_examples_0229.ipynb') as reader: notebook = reader.read() func_code_idx = {} # [func_name] = code_idx codes = [] svgs = [] cells = json.loads(notebook)['cells'] for cell in cells: if cell['cell_type'] == 'code': if 'outputs' in cell and cell['outputs']: code = ''.join(cell['source']) node = ast.parse(code) calls = findCallNodes(node) for call in calls: func_name, keywords = extractCallComponents(call) if func_name in SUPP_FUNCS and func_name not in func_code_idx: func_code_idx[func_name] = len(codes) codes.append(code) # Get SVG imgdata = StringIO.StringIO() codeObj = compile(code, '<string>', 'exec') exec codeObj plt.savefig(imgdata, format='svg', bbox_inches='tight') plt.close() imgdata.seek(0) svg = imgdata.buf svgs.append(svg) svgs = old_svgs + svgs