def get_calls(tree, blob): calls = [] traverse_type(tree.root_node, calls, 'call') def _traverse_calls(node, identifiers): if node.type == 'identifier': identifiers.append(node) if not node.children or node.type == 'argument_list': return for n in node.children: _traverse_calls(n, identifiers) results = [] for call in calls: identifiers = [] _traverse_calls(call, identifiers) if identifiers: identifier = identifiers[-1] argument_lists = [ n for n in call.children if n.type == 'argument_list' ] argument_list = '' if argument_lists: argument_list = match_from_span(argument_lists[-1], blob) results.append({ 'identifier': match_from_span(identifier, blob), 'argument_list': argument_list, 'start_point': identifier.start_point, 'end_point': identifier.end_point, }) return results
def get_methods(module_or_class_node, blob: str, module_name: str, node_type: str) -> List[Dict[str, Any]]: definitions = [] comment_buffer = [] module_or_class_name = match_from_span(module_or_class_node.children[1], blob) for child in module_or_class_node.children: if child.type == 'comment': comment_buffer.append(child) elif child.type == 'method': docstring = '\n'.join([match_from_span(comment, blob).strip().strip('#') for comment in comment_buffer]) docstring_summary = get_docstring_summary(docstring) metadata = RubyParser.get_function_metadata(child, blob) if metadata['identifier'] in RubyParser.BLACKLISTED_FUNCTION_NAMES: continue definitions.append({ 'type': 'class', 'identifier': '{}.{}.{}'.format(module_name, module_or_class_name, metadata['identifier']), 'parameters': metadata['parameters'], 'function': match_from_span(child, blob), 'function_tokens': tokenize_code(child, blob), 'docstring': docstring, 'docstring_summary': docstring_summary, 'start_point': child.start_point, 'end_point': child.end_point }) comment_buffer = [] else: comment_buffer = [] return definitions
def get_definition(tree, blob: str) -> List[Dict[str, Any]]: classes = (node for node in tree.root_node.children if node.type == 'class_declaration') definitions = [] for _class in classes: class_identifier = match_from_span([child for child in _class.children if child.type == 'identifier'][0], blob).strip() for child in (child for child in _class.children if child.type == 'class_body'): for idx, node in enumerate(child.children): if node.type == 'method_declaration': if JavaParser.is_method_body_empty(node): continue docstring = '' if idx - 1 >= 0 and child.children[idx-1].type == 'comment': docstring = match_from_span(child.children[idx - 1], blob) docstring = strip_c_style_comment_delimiters(docstring) docstring_summary = get_docstring_summary(docstring) metadata = JavaParser.get_function_metadata(node, blob) if metadata['identifier'] in JavaParser.BLACKLISTED_FUNCTION_NAMES: continue definitions.append({ 'type': node.type, 'identifier': '{}.{}'.format(class_identifier, metadata['identifier']), 'parameters': metadata['parameters'], 'function': match_from_span(node, blob), 'function_tokens': tokenize_code(node, blob), 'docstring': docstring, 'docstring_summary': docstring_summary, 'start_point': node.start_point, 'end_point': node.end_point }) return definitions
def get_definition(tree, blob: str) -> List[Dict[str, Any]]: definitions = [] comment_buffer = [] for child in tree.root_node.children: if child.type == 'comment': comment_buffer.append(child) elif child.type in ('method_declaration', 'function_declaration'): docstring = '\n'.join([match_from_span(comment, blob) for comment in comment_buffer]) docstring_summary = strip_c_style_comment_delimiters((get_docstring_summary(docstring))) metadata = GoParser.get_function_metadata(child, blob) definitions.append({ 'type': child.type, 'identifier': metadata['identifier'], 'parameters': metadata['parameters'], 'function': match_from_span(child, blob), 'function_tokens': tokenize_code(child, blob), 'docstring': docstring, 'docstring_summary': docstring_summary, 'start_point': child.start_point, 'end_point': child.end_point }) comment_buffer = [] else: comment_buffer = [] return definitions
def get_function_metadata(function_node, blob: str) -> Dict[str, str]: metadata = { 'identifier': '', 'parameters': '', } metadata['identifier'] = match_from_span(function_node.children[1], blob) if function_node.children[2].type == 'method_parameters': metadata['parameters'] = match_from_span(function_node.children[2], blob) return metadata
def _get_import(import_statement, blob): context = [] for n in import_statement.children: if n.type == 'dotted_name': context.append(match_from_span(n, blob).strip()) if n.type == 'aliased_import': for a in n.children: if a.type == 'dotted_name': context.append(match_from_span(a, blob).strip()) return context
def get_function_metadata(function_node, blob: str) -> Dict[str, str]: metadata = { 'identifier': '', 'parameters': '', } if function_node.type == 'function_declaration': metadata['identifier'] = match_from_span(function_node.children[1], blob) metadata['parameters'] = match_from_span(function_node.children[2], blob) elif function_node.type == 'method_declaration': metadata['identifier'] = match_from_span(function_node.children[2], blob) metadata['parameters'] = ' '.join([match_from_span(function_node.children[1], blob), match_from_span(function_node.children[3], blob)]) return metadata
def get_function_metadata(function_node, blob: str) -> Dict[str, str]: metadata = { 'identifier': '', 'parameters': '', } declarators = [] traverse_type(function_node, declarators, '{}_declaration'.format(function_node.type.split('_')[0])) parameters = [] for n in declarators[0].children: if n.type == 'identifier': metadata['identifier'] = match_from_span(n, blob).strip('(') elif n.type == 'formal_parameter': parameters.append(match_from_span(n, blob)) metadata['parameters'] = ' '.join(parameters) return metadata
def get_function_metadata(function_node, blob: str) -> Dict[str, str]: metadata = {'identifier': '', 'parameters': '', 'return_statement': ''} is_header = False for child in function_node.children: if is_header: if child.type == 'identifier': metadata['identifier'] = match_from_span(child, blob) elif child.type == 'parameters': metadata['parameters'] = match_from_span(child, blob) if child.type == 'def': is_header = True elif child.type == ':': is_header = False elif child.type == 'return_statement': metadata['return_statement'] = match_from_span(child, blob) return metadata
def _get_import_from(import_from_statement, blob): context = {} mode = 'from' library = '' for n in import_from_statement.children: if n.type == 'from': mode = 'from' elif n.type == 'import': mode = 'import' elif n.type == 'dotted_name': if mode == 'from': library = match_from_span(n, blob).strip() elif mode == 'import': if library: context[match_from_span(n, blob).strip().strip(',')] = library return context
def __process_functions( functions: Iterable, blob: str, func_identifier_scope: Optional[str] = None ) -> Iterator[Dict[str, Any]]: for function_node in functions: if PythonParser.is_function_empty(function_node): continue function_metadata = PythonParser.get_function_metadata( function_node, blob) if func_identifier_scope is not None: function_metadata['identifier'] = '{}.{}'.format( func_identifier_scope, function_metadata['identifier']) if function_metadata['identifier'].startswith( '__') and function_metadata['identifier'].endswith( '__'): continue # Blacklist built-in functions docstring_node = PythonParser.__get_docstring_node(function_node) function_metadata['docstring'] = PythonParser.get_docstring( docstring_node, blob) function_metadata['docstring_summary'] = get_docstring_summary( function_metadata['docstring']) function_metadata['function'] = match_from_span( function_node, blob) function_metadata['function_tokens'] = tokenize_code( function_node, blob, {docstring_node}) function_metadata['start_point'] = function_node.start_point function_metadata['end_point'] = function_node.end_point yield function_metadata
def get_docstring(tree, node, blob: str) -> str: docstring = '' parent_node = node_parent(tree, node) if parent_node.type == 'variable_declarator': base_node = node_parent( tree, parent_node) # Get the variable declaration elif parent_node.type == 'pair': base_node = parent_node # This is a common pattern where a function is assigned as a value to a dictionary. else: base_node = node prev_sibling = previous_sibling(tree, base_node) if prev_sibling is not None and prev_sibling.type == 'comment': all_prev_comment_nodes = [prev_sibling] prev_sibling = previous_sibling(tree, prev_sibling) while prev_sibling is not None and prev_sibling.type == 'comment': all_prev_comment_nodes.append(prev_sibling) last_comment_start_line = prev_sibling.start_point[0] prev_sibling = previous_sibling(tree, prev_sibling) if prev_sibling.end_point[0] + 1 < last_comment_start_line: break # if there is an empty line, stop expanding. docstring = ' '.join( (strip_c_style_comment_delimiters(match_from_span(s, blob)) for s in all_prev_comment_nodes[::-1])) return docstring
def get_class_metadata(class_node, blob: str) -> Dict[str, str]: metadata = { 'identifier': '', 'argument_list': '', } is_header = False for n in class_node.children: if is_header: if n.type == 'identifier': metadata['identifier'] = match_from_span(n, blob).strip('(:') elif n.type == 'argument_list': metadata['argument_list'] = match_from_span(n, blob) if n.type == 'class': is_header = True elif n.type == ':': break return metadata
def get_function_metadata(function_node, blob: str) -> Dict[str, str]: metadata = { 'identifier': '', 'parameters': '', } identifier_nodes = [ child for child in function_node.children if child.type == 'identifier' ] formal_parameters_nodes = [ child for child in function_node.children if child.type == 'formal_parameters' ] if identifier_nodes: metadata['identifier'] = match_from_span(identifier_nodes[0], blob) if formal_parameters_nodes: metadata['parameters'] = match_from_span( formal_parameters_nodes[0], blob) return metadata
def get_declarations(declaration_node, blob: str, node_type: str) -> List[Dict[str, Any]]: declarations = [] for idx, child in enumerate(declaration_node.children): if child.type == 'name': declaration_name = match_from_span(child, blob) elif child.type == 'method_declaration': docstring = PhpParser.get_docstring(declaration_node, blob, idx) docstring_summary = get_docstring_summary(docstring) function_nodes = [] traverse_type(child, function_nodes, 'function_definition') if function_nodes: function_node = function_nodes[0] metadata = PhpParser.get_function_metadata( function_node, blob) if metadata[ 'identifier'] in PhpParser.BLACKLISTED_FUNCTION_NAMES: continue declarations.append({ 'type': node_type, 'identifier': '{}.{}'.format(declaration_name, metadata['identifier']), 'parameters': metadata['parameters'], 'function': match_from_span(child, blob), 'function_tokens': tokenize_code(child, blob), 'docstring': docstring, 'docstring_summary': docstring_summary, 'start_point': function_node.start_point, 'end_point': function_node.end_point }) return declarations
def get_definition(tree, blob: str) -> List[Dict[str, Any]]: definitions = [] if 'ERROR' not in set([child.type for child in tree.root_node.children]): modules = [child for child in tree.root_node.children if child.type == 'module'] for module in modules: if module.children: module_name = match_from_span(module.children[1], blob) sub_modules = [child for child in module.children if child.type == 'module' and child.children] classes = [child for child in module.children if child.type == 'class'] for sub_module_node in sub_modules: definitions.extend(RubyParser.get_methods(sub_module_node, blob, module_name, sub_module_node.type)) for class_node in classes: definitions.extend(RubyParser.get_methods(class_node, blob, module_name, class_node.type)) return definitions
def get_definition(tree, blob: str) -> List[Dict[str, Any]]: function_nodes = [] functions = [] traverse_type(tree.root_node, function_nodes, 'function') for function in function_nodes: if function.children is None or len(function.children) == 0: continue parent_node = node_parent(tree, function) functions.append( (parent_node.type, function, JavascriptParser.get_docstring(tree, function, blob))) definitions = [] for node_type, function_node, docstring in functions: metadata = JavascriptParser.get_function_metadata( function_node, blob) docstring_summary = get_docstring_summary(docstring) if metadata[ 'identifier'] in JavascriptParser.BLACKLISTED_FUNCTION_NAMES: continue definitions.append({ 'type': node_type, 'identifier': metadata['identifier'], 'parameters': metadata['parameters'], 'function': match_from_span(function_node, blob), 'function_tokens': tokenize_code(function_node, blob), 'docstring': docstring, 'docstring_summary': docstring_summary, 'start_point': function_node.start_point, 'end_point': function_node.end_point }) return definitions
def get_docstring(docstring_node, blob: str) -> str: docstring = '' if docstring_node is not None: docstring = match_from_span(docstring_node, blob) docstring = docstring.strip().strip('"').strip("'") return docstring
def get_docstring(trait_node, blob: str, idx: int) -> str: docstring = '' if idx - 1 >= 0 and trait_node.children[idx - 1].type == 'comment': docstring = match_from_span(trait_node.children[idx - 1], blob) docstring = strip_c_style_comment_delimiters(docstring) return docstring