def scan_buf(buf, mtime=None, lang="Go"): """Scan the given GoBuffer return an ElementTree (conforming to the CIX schema) giving a summary of its code elements. @param buf {GoBuffer} is the Go buffer to scan @param mtime {int} is a modified time for the file (in seconds since the "epoch"). If it is not specified the _current_ time is used. Note that the default is not to stat() the file and use that because the given content might not reflect the saved file state. """ # Dev Notes: # - This stub implementation of the Go CILE return an "empty" # summary for the given content, i.e. CIX content that says "there # are no code elements in this Go content". # - Use the following command (in the extension source dir) to # debug/test your scanner: # codeintel scan -p -l Go <example-Go-file> # "codeintel" is a script available in the Komodo SDK. log.info("scan '%s'", buf.path) if mtime is None: mtime = int(time.time()) # The 'path' attribute must use normalized dir separators. if sys.platform.startswith("win"): path = buf.path.replace('\\', '/') else: path = buf.path tree = ET.Element("codeintel", version="2.0", xmlns="urn:activestate:cix:2.0") file = ET.SubElement(tree, "file", lang=lang, mtime=str(mtime)) blob = ET.SubElement(file, "scope", ilk="blob", lang=lang, name=os.path.basename(path)) #TODO: # - A 'package' -> 'blob'. Problem is a single go package can be from # multiple files... so really would want `lib.get_blobs(name)` instead # of `lib.get_blob(name)` in the codeintel API. How does Ruby deal with # this? Perl? # - How do the multi-platform stdlib syscall_linux.go et al fit together? # Dev Note: # This is where you process the Go content and add CIX elements # to 'blob' as per the CIX schema (cix-2.0.rng). Use the # "buf.accessor" API (see class Accessor in codeintel2.accessor) to # analyze. For example: # - A token stream of the content is available via: # buf.accessor.gen_tokens() # Use the "codeintel html -b <example-Go-file>" command as # a debugging tool. # - "buf.accessor.text" is the whole content of the file. If you have # a separate tokenizer/scanner tool for Go content, you may # want to use it. return tree
def tree(self): """The CIX tree for this buffer. Will lazily scan if necessary.""" self.acquire_lock() try: # SIDE-EFFECT: scan if necessary blob_from_lang = self.blob_from_lang tree = ET.Element("codeintel", version="2.0") path = self.path if os.sep != '/': path = path.replace(os.sep, '/') file = ET.SubElement(tree, "file", path=path, lang=self.lang, mtime=str(self._scan_time_cache)) if self._scan_error_cache: file.set("error", self._scan_error_cache) if blob_from_lang: for lang, blob in sorted(blob_from_lang.items()): file.append(blob) return tree finally: self.release_lock()
def do_scan(self, subcmd, opts, *path_patterns): """Scan and print the CIX for the given path(s). ${cmd_usage} ${cmd_option_list} """ mgr = Manager() mgr.upgrade() mgr.initialize() try: if opts.time_it: start = time.time() quiet = opts.quiet if opts.time_it or opts.time_details: opts.force = True scan_count = 0 lang_warnings = set() tree = None for path in _paths_from_path_patterns(path_patterns, recursive=opts.recursive, includes=opts.includes): if opts.time_it: sys.stderr.write(path + "\n") if opts.time_details: start1 = time.time() try: lang = opts.lang or guess_lang_from_path(path) except CodeIntelError: log.info("skip `%s': couldn't determine language", path) continue try: buf = mgr.buf_from_path(path, lang=lang) except OSError as ex: # Couldn't access the file. if not opts.recursive: raise # Ignore files we don't really care about. log.warn("%r - %r", ex, path) continue if not isinstance(buf, CitadelBuffer): if opts.recursive: # Ignore files that scanning isn't provided for. continue raise CodeIntelError("`%s' (%s) is not a language that " "uses CIX" % (path, buf.lang)) scan_count += 1 if scan_count % 10 == 0: log.info("%d scanning %r", scan_count, path) try: if opts.force: buf.scan() if tree is None: tree = ET.Element("codeintel", version="2.0") file_elem = ET.SubElement(tree, "file", lang=buf.lang, mtime=str(int(time.time())), path=os.path.basename(path)) for lang, blob in sorted(buf.blob_from_lang.items()): blob = buf.blob_from_lang[lang] file_elem.append(blob) except KeyError as ex: # Unknown cile language. if not opts.recursive: raise message = str(ex) if message not in lang_warnings: lang_warnings.add(message) log.warn("Skipping unhandled language %s", message) if opts.time_details: delta = time.time() - start1 sys.stderr.write("%.3f %s\n" % (delta, path)) sys.stderr.flush() if tree is not None: if opts.stripfuncvars: # For stdlibs, we don't care about variables inside of # functions and they take up a lot of space. for function in tree.getiterator('scope'): if function.get('ilk') == 'function': function[:] = [ child for child in function if child.tag != 'variable' ] if opts.pretty_print: tree = pretty_tree_from_tree(tree) if not quiet: sys.stdout.write( '<?xml version="1.0" encoding="UTF-8"?>\n') ET.dump(tree) if opts.time_it: end = time.time() sys.stderr.write("scan took %.3fs\n" % (end - start)) finally: mgr.finalize()
def scan_buf(buf, mtime=None, lang="TorqueScript"): """Scan the given TorqueScriptBuffer return an ElementTree (conforming to the CIX schema) giving a summary of its code elements. @param buf {TorqueScriptBuffer} is the TorqueScript buffer to scan @param mtime {int} is a modified time for the file (in seconds since the "epoch"). If it is not specified the _current_ time is used. Note that the default is not to stat() the file and use that because the given content might not reflect the saved file state. """ # Dev Notes: # - This stub implementation of the TorqueScript CILE return an "empty" # summary for the given content, i.e. CIX content that says "there # are no code elements in this TorqueScript content". # - Use the following command (in the extension source dir) to # debug/test your scanner: # codeintel scan -p -l TorqueScript <example-TorqueScript-file> # "codeintel" is a script available in the Komodo SDK. #log.info("scan '%s'", buf.path) if mtime is None: mtime = int(time.time()) # The 'path' attribute must use normalized dir separators. if sys.platform.startswith("win"): path = buf.path.replace('\\', '/') else: path = buf.path tree = ET.Element("codeintel", version="2.0", xmlns="urn:activestate:cix:2.0") file_elt = ET.SubElement(tree, "file", lang=lang, mtime=str(mtime)) blob = ET.SubElement(file_elt, "scope", ilk="blob", lang=lang, name=os.path.basename(path)) # Dev Note: # This is where you process the TorqueScript content and add CIX elements # to 'blob' as per the CIX schema (cix-2.0.rng). Use the # "buf.accessor" API (see class Accessor in codeintel2.accessor) to # analyze. For example: # - A token stream of the content is available via: # buf.accessor.gen_tokens() # Use the "codeintel html -b <example-TorqueScript-file>" command as # a debugging tool. # - "buf.accessor.text" is the whole content of the file. If you have # a separate tokenizer/scanner tool for TorqueScript content, you may # want to use it. #log.info("Setting scan buffer") old_stdout = sys.stdout sys.stdout = sys.stderr ytab.yy_clear_stacks() torque_lex.set_scan_buffer(buf.accessor.text, is_filename=False) try: #log.info("Attempting parse") successful_parse = not ytab.yyparse() except Exception: successful_parse = False import traceback traceback.print_exc(file=sys.stderr) traceback.print_tb(sys.exc_info()[2], file=sys.stderr) if successful_parse: #let's extract something here ts_ast = ytab.yyvs[1] #log.info("Extracting blob") extract_blob_data(ts_ast, blob, file_elt) else: file_elt.set("error", "Error parsing file") sys.stdout = old_stdout return tree