Esempio n. 1
0
  def get_called_func_arg_lists(self):
    """
    Returns two lists:
     - called_funcs: [func_name1, func_name2, ...]
     - called_args: [func_name] = [arg1, arg2, ...]

    Only the function names and keywords arguments that occur in the mined API
    docs are listed.

    """
    assert self.current_node
    # NOTE: setting call_nodes as a member variable is important here, as
    # individual baseline methods may use the call_nodes to directly get
    # function-call relations.
    self.call_nodes = findCallNodes(self.current_node)
    called_funcs = []
    called_args = {}
    for call in self.call_nodes:
      func_name, keywords = extractCallComponents(call)
      if func_name in self.fs:
        called_funcs.append(func_name)
        called_args[func_name] = []
        for k in keywords:
          if (func_name, k) in self.fas:
            called_args[func_name].append(k)
    return called_funcs, called_args
Esempio n. 2
0
    def rank_funcs(self, query, funcs, parent):
        """
    The score of each function is the maximum of the following:
     - similarity(func, query)
     - similarity(func + arg, query) for any arg in the current call

    If self.bow == True, then will try to use parent.call_nodes to get
    functions and arguments and funcs will be ignored. If parent.call_nodes is
    not available, then funcs will be used.

    TODO: make use of the value of arguments in searching, too. e.g., "red" as
          in color="red".

    """
        query = query.lower()
        self.model.init_sims()
        q_vec = self.get_bow_representation(query)
        if self.bow \
            and parent is not None \
            and hasattr(parent, 'call_nodes'):
            funcs = []
            scores = []
            for call in parent.call_nodes:
                func, keywords = extractCallComponents(call)
                tmp_vecmat = np.zeros(
                    (1 + len(keywords), self.model.vector_size))
                score = 0
                if func in self.func_lookup:
                    func_idx = self.func_lookup[func]
                    tmp_vecmat[0] += self.f_vecmat[func_idx]
                    if func in self.func_arg_lookup:
                        for i, arg in enumerate(keywords):
                            if arg in self.func_arg_lookup[func]:
                                arg_idx = self.func_arg_lookup[func][arg]
                                tmp_vecmat[i +
                                           1] += self.a_vecmat[func][arg_idx]
                    tmp_scores = np.dot(q_vec, tmp_vecmat.T)
                    score = tmp_scores.max()
                funcs.append(func)
                scores.append(score)
        elif self.bow:
            func_vecmat = np.zeros((len(funcs), self.model.vector_size))
            for i, func in enumerate(funcs):
                func_idx = self.func_lookup[func]
                func_vecmat[i] += self.f_vecmat[func_idx]
            scores = np.dot(q_vec, func_vecmat.T)
        else:
            func_vecmat = np.zeros((len(funcs), self.model.vector_size))
            for i, func in enumerate(funcs):
                if func in self.model.vocab:
                    func_idx = self.model.vocab[func].index
                    func_vecmat[i] += self.model.syn0norm[func_idx]
            scores = np.dot(q_vec, func_vecmat.T)

        sorted_funcs = sorted(zip(funcs, scores),
                              key=lambda x: x[1],
                              reverse=True)

        return sorted_funcs
Esempio n. 3
0
  def rank_funcs(self, query, funcs, parent):
    """
    The score of each function is the maximum of the following:
     - similarity(func, query)
     - similarity(func + arg, query) for any arg in the current call

    If self.bow == True, then will try to use parent.call_nodes to get
    functions and arguments and funcs will be ignored. If parent.call_nodes is
    not available, then funcs will be used.

    TODO: make use of the value of arguments in searching, too. e.g., "red" as
          in color="red".

    """
    query = query.lower()
    self.model.init_sims()
    q_vec = self.get_bow_representation(query)
    if self.bow \
        and parent is not None \
        and hasattr(parent, 'call_nodes'):
      funcs = []
      scores = []
      for call in parent.call_nodes:
        func, keywords = extractCallComponents(call)
        tmp_vecmat = np.zeros((1 + len(keywords), self.model.vector_size))
        score = 0
        if func in self.func_lookup:
          func_idx = self.func_lookup[func]
          tmp_vecmat[0] += self.f_vecmat[func_idx]
          if func in self.func_arg_lookup:
            for i,arg in enumerate(keywords):
              if arg in self.func_arg_lookup[func]:
                arg_idx = self.func_arg_lookup[func][arg]
                tmp_vecmat[i+1] += self.a_vecmat[func][arg_idx]
          tmp_scores = np.dot(q_vec, tmp_vecmat.T)
          score = tmp_scores.max()
        funcs.append(func)
        scores.append(score)
    elif self.bow:
      func_vecmat = np.zeros((len(funcs), self.model.vector_size))
      for i, func in enumerate(funcs):
        func_idx = self.func_lookup[func]
        func_vecmat[i] += self.f_vecmat[func_idx]
      scores = np.dot(q_vec, func_vecmat.T)
    else:
      func_vecmat = np.zeros((len(funcs), self.model.vector_size))
      for i, func in enumerate(funcs):
        if func in self.model.vocab:
          func_idx = self.model.vocab[func].index
          func_vecmat[i] += self.model.syn0norm[func_idx]
      scores = np.dot(q_vec, func_vecmat.T)

    sorted_funcs = sorted(zip(funcs, scores), key=lambda x:x[1], reverse=True)

    return sorted_funcs
Esempio n. 4
0
 def rank_funcs(self, query, funcs, parent):
   """
   Returns an ordered list of tuples. The first element of each tuple should
   be func.
   """
   assert parent.current_node
   call_nodes = findCallNodes(parent.current_node)
   func_scores = {}  # [func] = score
   for call in call_nodes:
     func, keywords = extractCallComponents(call)
     if func in funcs:
       myast_call = MyAST(node=call)
       score = self.model.scoreFullTree(query, myast_call)
       if func not in func_scores or func_scores[func] < score:
         func_scores[func] = score
   sorted_funcs = sorted(func_scores.items(), key=lambda x:x[1], reverse=True)
   return sorted_funcs
Esempio n. 5
0
 def rank_funcs(self, query, funcs, parent):
     """
 Returns an ordered list of tuples. The first element of each tuple should
 be func.
 """
     assert parent.current_node
     call_nodes = findCallNodes(parent.current_node)
     func_scores = {}  # [func] = score
     for call in call_nodes:
         func, keywords = extractCallComponents(call)
         if func in funcs:
             myast_call = MyAST(node=call)
             score = self.model.scoreFullTree(query, myast_call)
             if func not in func_scores or func_scores[func] < score:
                 func_scores[func] = score
     sorted_funcs = sorted(func_scores.items(),
                           key=lambda x: x[1],
                           reverse=True)
     return sorted_funcs
Esempio n. 6
0
    def __init__(self):
      TEST_VECTOR_BIN_FILE = 'output/vectors-flat-mpl-0205.bin'
      # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-5gram.bin'
      # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-stem-3gram.bin'
      MAXNGRAM = 3

      for i in range(2):
        print '\nRound %d\n'%i

        if i == 0:
          print 'Loading a big vector file. Will take a while....'
          wb = Word2vecBaseline(TEST_VECTOR_BIN_FILE,
                                maxngram=MAXNGRAM)
        else:
          from annotate_code_with_api import get_fu_fau
          fu_fau = get_fu_fau()
          wb = Word2vecBaseline(wb.model,
                                maxngram=MAXNGRAM,
                                fu_fau=fu_fau)

        TEST_CODE = """plt.bar(x, y, color="red")
plt.title('hello world')
plt.xlim(1,6)"""
        node = ast.parse(TEST_CODE)
        self.call_nodes = findCallNodes(node)
        funcs = [extractCallComponents(x)[0] for x in self.call_nodes]
        results = wb.rank_funcs('set colors of the faces', funcs, self)
        for x in results:
          print x[0], x[1]

        print '-------------'

        results = wb.rank_args(
          'add shadow to legend',
          'legend',
          ['shadow', 'bbox_to_anchor', 'fontsize'])
        for x in results:
          print x[0], x[1]
Esempio n. 7
0
        def __init__(self):
            TEST_VECTOR_BIN_FILE = 'output/vectors-flat-mpl-0205.bin'
            # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-5gram.bin'
            # TEST_VECTOR_BIN_FILE = 'output/vectors-so-text-python-stem-3gram.bin'
            MAXNGRAM = 3

            for i in range(2):
                print '\nRound %d\n' % i

                if i == 0:
                    print 'Loading a big vector file. Will take a while....'
                    wb = Word2vecBaseline(TEST_VECTOR_BIN_FILE,
                                          maxngram=MAXNGRAM)
                else:
                    from annotate_code_with_api import get_fu_fau
                    fu_fau = get_fu_fau()
                    wb = Word2vecBaseline(wb.model,
                                          maxngram=MAXNGRAM,
                                          fu_fau=fu_fau)

                TEST_CODE = """plt.bar(x, y, color="red")
plt.title('hello world')
plt.xlim(1,6)"""
                node = ast.parse(TEST_CODE)
                self.call_nodes = findCallNodes(node)
                funcs = [extractCallComponents(x)[0] for x in self.call_nodes]
                results = wb.rank_funcs('set colors of the faces', funcs, self)
                for x in results:
                    print x[0], x[1]

                print '-------------'

                results = wb.rank_args(
                    'add shadow to legend', 'legend',
                    ['shadow', 'bbox_to_anchor', 'fontsize'])
                for x in results:
                    print x[0], x[1]
Esempio n. 8
0
    seen_code_set = set()
    count_dupe = 0
    for i in xrange(len(all_codes)):
        if not svgs[i]: continue
        code = all_codes[i].strip()

        if code in seen_code_set:
            # Dedupe
            count_dupe += 1
            continue
        else:
            seen_code_set.add(code)

        node = ast.parse(code)
        calls = findCallNodes(node)
        for call in calls:
            func_name, keywords = extractCallComponents(call)
            if func_name in plot_commands_set:
                examples[func_name].add(i)

    print 'There are %d duplicates' % count_dupe

    print '"Scoring" code examples.'
    # Sorting function: number of chars in the code example
    examples = dict(examples)
    for func, idxs in examples.items():
        examples[func] = sorted(
            idxs, key=lambda x: get_effective_code_len(all_codes[x]))

    bh.save('plotcommands_examples', examples)
Esempio n. 9
0
  with open('supp_examples_0229.ipynb') as reader:
    notebook = reader.read()

  func_code_idx = {}  # [func_name] = code_idx
  codes = []
  svgs = []
  cells = json.loads(notebook)['cells']
  for cell in cells:
    if cell['cell_type'] == 'code':
      if 'outputs' in cell and cell['outputs']:
        code = ''.join(cell['source'])
        node = ast.parse(code)
        calls = findCallNodes(node)
        for call in calls:
          func_name, keywords = extractCallComponents(call)
          if func_name in SUPP_FUNCS and func_name not in func_code_idx:
            func_code_idx[func_name] = len(codes)
            codes.append(code)

            # Get SVG
            imgdata = StringIO.StringIO()
            codeObj = compile(code, '<string>', 'exec')
            exec codeObj
            plt.savefig(imgdata, format='svg', bbox_inches='tight')
            plt.close()
            imgdata.seek(0)
            svg = imgdata.buf
            svgs.append(svg)

  svgs = old_svgs + svgs