# Create 'blob_index' and 'toplevel*_index' and write out # '.blob' file. LEN_PREFIX = self.db.LEN_PREFIX is_hits_from_lpath_lang = lang in self.db.import_everything_langs blob_index = {} # {blobname -> dbfile} toplevelname_index = {} # {ilk -> toplevelname -> blobnames} toplevelprefix_index = {} # {ilk -> prefix -> toplevelnames} for blob in tree.findall("file/scope"): assert lang == blob.get("lang"), \ "Adding %s resource %s to %s blob" % ( lang, res, blob.get("lang")) blobname = blob.get("name") dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname) blob_index[blobname] = dbfile ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob")) for toplevelname, elem in blob.names.iteritems(): if "__local__" in elem.get("attributes", "").split(): # this is internal to the stdlib continue ilk = elem.get("ilk") or elem.tag bft = toplevelname_index.setdefault(ilk, {}) if toplevelname not in bft: bft[toplevelname] = set([blobname]) else: bft[toplevelname].add(blobname) prefix = toplevelname[:LEN_PREFIX] tfp = toplevelprefix_index.setdefault(ilk, {}) if prefix not in tfp: tfp[prefix] = set([toplevelname]) else:
def _add_res(self, res): cix_path = res.path try: tree = tree_from_cix_path(cix_path) except ET.XMLParserError as ex: log.warn("could not load `%s' into catalog (skipping): %s", cix_path, ex) return LEN_PREFIX = self.db.LEN_PREFIX res_id = self._new_res_id() res_data = {} # {lang -> blobname -> ilk -> toplevelnames} name = tree.get("name") or splitext(basename(cix_path))[0] for blob in tree.findall("file/scope"): lang, blobname = blob.get("lang"), blob.get("name") if not lang: raise DatabaseError("add `%s': no 'lang' attr on %r" % (res, blob)) # Create 'res_data'. tfifb = res_data.setdefault(lang, {}) toplevelnames_from_ilk = tfifb.setdefault(blobname, {}) if lang in self.db.import_everything_langs: for toplevelname, elem in blob.names.items(): ilk = elem.get("ilk") or elem.tag if ilk not in toplevelnames_from_ilk: toplevelnames_from_ilk[ilk] = set([toplevelname]) else: toplevelnames_from_ilk[ilk].add(toplevelname) # Update 'toplevel*_index'. # toplevelname_index: {lang -> ilk -> toplevelname -> res_id -> blobnames} # toplevelprefix_index: {lang -> ilk -> prefix -> res_id -> # toplevelnames} bfrftfi = self.toplevelname_index.setdefault(lang, {}) tfrfpfi = self.toplevelprefix_index.setdefault(lang, {}) for ilk, toplevelnames in toplevelnames_from_ilk.items(): bfrft = bfrftfi.setdefault(ilk, {}) tfrfp = tfrfpfi.setdefault(ilk, {}) for toplevelname in toplevelnames: bfr = bfrft.setdefault(toplevelname, {}) if res_id not in bfr: bfr[res_id] = set([blobname]) else: bfr[res_id].add(blobname) prefix = toplevelname[:LEN_PREFIX] tfr = tfrfp.setdefault(prefix, {}) if res_id not in tfr: tfr[res_id] = set([toplevelname]) else: tfr[res_id].add(toplevelname) # Update 'blob_index'. dbfile_and_res_id_from_blobname \ = self.blob_index.setdefault(lang, {}) assert blobname not in dbfile_and_res_id_from_blobname, \ ("codeintel: %s %r blob in `%s' collides " "with existing %s %r blob (from res_id %r) in catalog: " "(XXX haven't decided how to deal with that yet)" % (lang, blobname, cix_path, lang, blobname, dbfile_and_res_id_from_blobname[blobname][1])) dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname) dbfile_and_res_id_from_blobname[blobname] = (dbfile, res_id) # Write out '.blob' file. dbdir = join(self.base_dir, safe_lang_from_lang(lang)) if not exists(dbdir): log.debug("fs-write: mkdir '%s'", dbdir) os.makedirs(dbdir) log.debug("fs-write: catalog %s blob '%s'", lang, dbfile) ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob")) # Update 'res_index'. last_updated = os.stat(cix_path).st_mtime self.res_index[res.area_path] \ = (res_id, last_updated, name, res_data)
def update_buf_data(self, buf, scan_tree, scan_time, scan_error, skip_scan_time_check=False): """Update this MultiLangZone with the buffer data. @param buf {CitadelBuffer} the buffer whose data is being added to the database. @param scan_tree {ciElementTree} the CIX scan data. Might be None if there was an early scanning failure. @param scan_time {timestamp} the time of the scan, typically the mtime of the file @param scan_error {str} an error string if scanning failed, or None if it was succesful. @param skip_scan_time_check {boolean} (default False) is a boolean indicating if the buffer data should be updated even if `scan_time` is <= that in the database. """ self._acquire_lock() try: # TODO: Canonicalize path (or assert that it is canonicalized) dir, base = split(buf.path) # Get the current data, if any. res_index = self.load_index(dir, "res_index", {}) res_index_has_changed = False blob_index = self.load_index(dir, "blob_index", {}) blob_index_has_changed = False is_hits_from_lpath_lang = self.lang in self.db.import_everything_langs if is_hits_from_lpath_lang: # TODO: Not sure {} for a default is correct here. toplevelname_index = self.load_index(dir, "toplevelname_index", {}) toplevelname_index_has_changed = False try: (old_scan_time, old_scan_error, old_res_data) = res_index[base] except KeyError: # adding a new entry (old_scan_time, old_scan_error, old_res_data) = None, None, {} else: # updating an existing entry if not skip_scan_time_check and scan_time is not None \ and scan_time <= old_scan_time: log.debug( "skipping db update for '%s': %s < %s and " "no 'skip_scan_time_check' option", base, scan_time, old_scan_time) return log.debug("update from %s buf '%s'", buf.lang, buf.path) # Parse the tree and get the list of blobnames. # res_data: {lang -> blobname -> ilk -> toplevelnames} new_res_data = {} new_blob_from_lang_and_blobname = {} if scan_tree: for blob in scan_tree[0]: lang = blob.get("lang") blobname = blob.get("name") new_blob_from_lang_and_blobname[(lang, blobname)] = blob tfifb = new_res_data.setdefault(lang, {}) toplevelnames_from_ilk = tfifb.setdefault(blobname, {}) for toplevelname, elem in blob.names.items(): ilk = elem.get("ilk") or elem.tag if ilk not in toplevelnames_from_ilk: toplevelnames_from_ilk[ilk] = set([toplevelname]) else: toplevelnames_from_ilk[ilk].add(toplevelname) # For PHP namespaces, we also want to add all namespace # child items, as this will make it easy for tree_php # to lookup a Fully Qualified Namespace (FQN). if ilk == "namespace" and lang == "PHP": for childname, childelem in elem.names.items(): child_ilk = childelem.get( "ilk") or childelem.tag child_fqn = "%s\\%s" % (toplevelname, childname) if child_ilk not in toplevelnames_from_ilk: toplevelnames_from_ilk[child_ilk] = set( [child_fqn]) else: toplevelnames_from_ilk[child_ilk].add( child_fqn) # Determine necessary changes to res_index. if scan_error: if (scan_time != old_scan_time or scan_error != old_scan_error): res_index[base] = (scan_time, scan_error, old_res_data) res_index_has_changed = True else: # Only consider new blobs if there wasn't a scan error. # I.e., we want to preserve the last good scan info. if (scan_time != old_scan_time or scan_error != old_scan_error or new_res_data != old_res_data): res_index[base] = (scan_time, scan_error, new_res_data) res_index_has_changed = True if is_hits_from_lpath_lang: if new_res_data != old_res_data: toplevelname_index.update(base, old_res_data, new_res_data) toplevelname_index_has_changed = True # Determine necessary changes to dbfile_from_blobname index # and the dbfiles and then make them. dbfile_changes = [] for (lang, blobname), blob \ in list(new_blob_from_lang_and_blobname.items()): try: old_res_data[lang][blobname] except KeyError: dbfile_changes.append(("add", lang, blobname, blob)) else: dbfile_changes.append(("update", lang, blobname, blob)) for lang, old_tfifb in list(old_res_data.items()): for blobname in old_tfifb: try: new_res_data[lang][blobname] except KeyError: dbfile_changes.append( ("remove", lang, blobname, None)) dhash = self.dhash_from_dir(dir) for action, lang, blobname, blob in dbfile_changes: if action == "add": dbfile = self.db.bhash_from_blob_info( buf.path, lang, blobname) blob_index.setdefault(lang, {})[blobname] = dbfile blob_index_has_changed = True dbdir = join(self.base_dir, dhash) if not exists(dbdir): self._mk_dbdir(dbdir, dir) # XXX What to do on write failure? log.debug("fs-write: %s|%s blob '%s/%s'", self.lang, lang, dhash, dbfile) if blob.get("src") is None: blob.set("src", buf.path) # for defns_from_pos() support ET.ElementTree(blob).write( join(dbdir, dbfile + ".blob")) elif action == "remove": dbfile = blob_index[lang][blobname] del blob_index[lang][blobname] blob_index_has_changed = True # XXX What to do on removal failure? log.debug("fs-write: remove %s|%s blob '%s/%s'", self.lang, lang, dhash, dbfile) try: os.remove( join(self.base_dir, dhash, dbfile + ".blob")) except EnvironmentError as ex: self.db.corruption( "MultiLangZone.update_buf_data", "could not remove dbfile for '%s' blob: %s" % (blobname, ex), "ignore") elif action == "update": # Try to only change the dbfile on disk if it is # different. s = BytesIO() if blob.get("src") is None: blob.set("src", buf.path) # for defns_from_pos() support ET.ElementTree(blob).write(s) new_dbfile_content = s.getvalue() dbfile = blob_index[lang][blobname] dbpath = join(self.base_dir, dhash, dbfile + ".blob") # PERF: Might be nice to cache the new dbfile # content for the next time this resource is # updated. For files under edit this will be # common. I.e. just for the "editset". try: fin = open(dbpath, 'rb') except (OSError, IOError) as ex: # Technically if the dbfile doesn't exist, this # is a sign of database corruption. No matter # though (for this blob anyway), we are about to # replace it. old_dbfile_content = None else: try: old_dbfile_content = fin.read() finally: fin.close() if new_dbfile_content != old_dbfile_content: if not exists(dirname(dbpath)): self._mk_dbdir(dirname(dbpath), dir) # XXX What to do if fail to write out file? log.debug("fs-write: %s|%s blob '%s/%s'", self.lang, lang, dhash, dbfile) fout = open(dbpath, 'wb') try: fout.write(new_dbfile_content) finally: fout.close() if res_index_has_changed: self.changed_index(dir, "res_index") if blob_index_has_changed: self.changed_index(dir, "blob_index") if is_hits_from_lpath_lang and toplevelname_index_has_changed: self.changed_index(dir, "toplevelname_index") finally: self._release_lock()
def _add_res(self, res, lang, name, ver): log.debug("%s stdlibs: add %s", lang, res) cix_path = res.path try: tree = tree_from_cix_path(cix_path) except ET.XMLParserError as ex: log.warn("could not load %s stdlib from `%s' (%s): skipping", name, cix_path, ex) return dbdir = join(self.base_dir, name) if exists(dbdir): log.warn( "`db/stdlibs/%s' already exists and should not: " "removing it", name) try: rmdir(dbdir) except OSError as ex: log.error( "could not remove `%s' to create %s stdlib in " "database (%s): skipping", dbdir, name) if not exists(dbdir): os.makedirs(dbdir) # Create 'blob_index' and 'toplevel*_index' and write out # '.blob' file. LEN_PREFIX = self.db.LEN_PREFIX is_hits_from_lpath_lang = lang in self.db.import_everything_langs blob_index = {} # {blobname -> dbfile} toplevelname_index = {} # {ilk -> toplevelname -> blobnames} toplevelprefix_index = {} # {ilk -> prefix -> toplevelnames} for blob in tree.findall("file/scope"): assert lang == blob.get("lang"), \ "Adding %s resource %s to %s blob" % ( lang, res, blob.get("lang")) blobname = blob.get("name") dbfile = self.db.bhash_from_blob_info(cix_path, lang, blobname) blob_index[blobname] = dbfile ET.ElementTree(blob).write(join(dbdir, dbfile + ".blob")) for toplevelname, elem in blob.names.items(): if "__local__" in elem.get("attributes", "").split(): # this is internal to the stdlib continue ilk = elem.get("ilk") or elem.tag bft = toplevelname_index.setdefault(ilk, {}) if toplevelname not in bft: bft[toplevelname] = set([blobname]) else: bft[toplevelname].add(blobname) prefix = toplevelname[:LEN_PREFIX] tfp = toplevelprefix_index.setdefault(ilk, {}) if prefix not in tfp: tfp[prefix] = set([toplevelname]) else: tfp[prefix].add(toplevelname) self.db.save_pickle(join(dbdir, "blob_index"), blob_index) self.db.save_pickle(join(dbdir, "toplevelname_index"), toplevelname_index) self.db.save_pickle(join(dbdir, "toplevelprefix_index"), toplevelprefix_index) mtime = os.stat(cix_path).st_mtime self.res_index[res.area_path] = mtime