def __getitem__(self, cpv): """set a cpv to values This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion. that said, if the class handles it, they can override it.""" if self.updates > self.sync_rate: self.commit() self.updates = 0 d = self._getitem(cpv) if self.serialize_eclasses and "_eclasses_" in d: d["_eclasses_"] = reconstruct_eclasses( cpv, d["_eclasses_"], self.validation_chf, paths=self.store_eclass_paths) elif "_eclasses_" not in d: d["_eclasses_"] = {} # Never return INHERITED, since portdbapi.aux_get() will # generate it automatically from _eclasses_, and we want # to omit it in comparisons between cache entries like # those that egencache uses to avoid redundant writes. d.pop("INHERITED", None) mtime = d.get('_mtime_') if mtime is None: raise cache_errors.CacheCorruption(cpv, '_mtime_ field is missing') try: mtime = long(mtime) except ValueError: raise cache_errors.CacheCorruption( cpv, '_mtime_ conversion to long failed: %s' % (mtime, )) d['_mtime_'] = mtime return d
def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True): """returns a dict when handed a string generated by serialize_eclasses""" eclasses = eclass_string.rstrip().lstrip().split("\t") if eclasses == [""]: # occasionally this occurs in the fs backends. they suck. return {} converter = _chf_deserializers.get(chf_type, lambda x: x) if paths: if len(eclasses) % 3 != 0: raise cache_errors.CacheCorruption( cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) elif len(eclasses) % 2 != 0: raise cache_errors.CacheCorruption( cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) d = {} try: i = iter(eclasses) if paths: # The old format contains paths that will be discarded. for name, path, val in zip(i, i, i): d[name] = (path, converter(val)) else: for name, val in zip(i, i): d[name] = converter(val) except IndexError: raise cache_errors.CacheCorruption( cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) except ValueError: raise cache_errors.CacheCorruption( cpv, "_eclasses_ not valid for chf_type {}".format(chf_type)) del eclasses return d
def reconstruct_eclasses(cpv, eclass_string): """returns a dict when handed a string generated by serialize_eclasses""" eclasses = eclass_string.rstrip().lstrip().split("\t") if eclasses == [""]: # occasionally this occurs in the fs backends. they suck. return {} if len(eclasses) % 2 != 0 and len(eclasses) % 3 != 0: raise cache_errors.CacheCorruption( cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) d = {} try: if eclasses[1].isdigit(): for x in range(0, len(eclasses), 2): d[eclasses[x]] = ("", long(eclasses[x + 1])) else: # The old format contains paths that will be discarded. for x in range(0, len(eclasses), 3): d[eclasses[x]] = (eclasses[x + 1], long(eclasses[x + 2])) except IndexError: raise cache_errors.CacheCorruption( cpv, "_eclasses_ was of invalid len %i" % len(eclasses)) except ValueError: raise cache_errors.CacheCorruption( cpv, "_eclasses_ mtime conversion to long failed") del eclasses return d
def _setitem(self, cpv, values): try: # insert. try: pkgid = self._insert_cpv(cpv) except self._BaseError as e: raise cache_errors.CacheCorruption(cpv, e) # __getitem__ fills out missing values, # so we store only what's handed to us and is a known key db_values = [] for key in self._known_keys: if key in values and values[key]: db_values.append({"key": key, "value": values[key]}) if len(db_values) > 0: try: self.con.executemany("INSERT INTO %s (pkgid, key, value) VALUES(\"%s\", %%(key)s, %%(value)s)" % \ (self.SCHEMA_VALUES_NAME, str(pkgid)), db_values) except self._BaseError as e: raise cache_errors.CacheCorruption(cpv, e) if self.autocommits: self.commit() except SystemExit: raise except Exception: if not self.autocommits: try: self.db.rollback() except self._BaseError: pass raise
def _setitem(self, cpv, values): s = cpv.rfind("/") fp=os.path.join(self.location,cpv[:s],".update.%i.%s" % (os.getpid(), cpv[s+1:])) try: myf = codecs.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (OSError, IOError) as e: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) myf = codecs.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (OSError, IOError) as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) for x in self.auxdbkey_order: myf.write(values.get(x,"")+"\n") myf.close() self._ensure_access(fp, mtime=values["_mtime_"]) #update written. now we move it. new_fp = os.path.join(self.location,cpv) try: os.rename(fp, new_fp) except (OSError, IOError) as e: os.remove(fp) raise cache_errors.CacheCorruption(cpv, e)
def _setitem(self, cpv, values): with tempfile.NamedTemporaryFile(delete=False, dir=self.location, prefix=cpv.replace('/', '_')) as temp: temp.close() try: with io.open(temp.name, mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') as myf: for k in self._write_keys: v = values.get(k) if not v: continue # NOTE: This format string requires unicode_literals, so that # k and v are coerced to unicode, in order to prevent TypeError # when writing raw bytes to TextIOWrapper with Python 2. myf.write("%s=%s\n" % (k, v)) self._ensure_access(temp.name) # Update written, we can move it. new_fp = os.path.join(self.location, cpv) try: os.rename(temp.name, new_fp) except OSError as e: if e.errno == errno.ENOENT: self._ensure_dirs(cpv) os.rename(temp.name, new_fp) else: raise cache_errors.CacheCorruption(cpv, e) except EnvironmentError as e: os.remove(temp.name) raise cache_errors.CacheCorruption(cpv, e)
def _getitem(self, cpv): # Don't use os.path.join, for better performance. fp = self.location + _os.sep + cpv try: with io.open( _unicode_encode(fp, encoding=_encodings["fs"], errors="strict"), mode="r", encoding=_encodings["repo.content"], errors="replace", ) as myf: lines = myf.read().split("\n") if not lines[-1]: lines.pop() d = self._parse_data(lines, cpv) if "_mtime_" not in d: # Backward compatibility with old cache # that uses mtime mangling. d["_mtime_"] = _os.fstat(myf.fileno())[stat.ST_MTIME] return d except (IOError, OSError) as e: if e.errno != errno.ENOENT: raise cache_errors.CacheCorruption(cpv, e) raise KeyError(cpv, e)
def _getitem(self, cpv): cursor = self._db_cursor cursor.execute("select * from %s where %s=%s" % \ (self._db_table["packages"]["table_name"], self._db_table["packages"]["package_key"], self._db_escape_string(cpv))) result = cursor.fetchall() if len(result) == 1: pass elif len(result) == 0: raise KeyError(cpv) else: raise cache_errors.CacheCorruption(cpv, "key is not unique") result = result[0] d = {} allowed_keys_set = self._allowed_keys_set for column_index, column_info in enumerate(cursor.description): k = column_info[0] if k in allowed_keys_set: v = result[column_index] if v is None: # This happens after a new empty column has been added. v = "" d[k] = v return d
def iteritems(self): try: self.con.execute( "SELECT cpv, key, value FROM %s NATURAL JOIN %s " "WHERE label=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME, self.label) ) except self._BaseError as e: raise cache_errors.CacheCorruption(self, cpv, e) oldcpv = None l = [] for x, y, v in self.con.fetchall(): if oldcpv != x: if oldcpv != None: d = dict(l) if "_eclasses_" in d: d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"]) else: d["_eclasses_"] = {} yield cpv, d l.clear() oldcpv = x l.append((y, v)) if oldcpv != None: d = dict(l) if "_eclasses_" in d: d["_eclasses_"] = reconstruct_eclasses(oldcpv, d["_eclasses_"]) else: d["_eclasses_"] = {} yield cpv, d
def _getitem(self, cpv): # Don't use os.path.join, for better performance. fp = self.location + _os.sep + cpv try: myf = codecs.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') try: lines = myf.read().split("\n") if not lines[-1]: lines.pop() d = self._parse_data(lines, cpv) if '_mtime_' not in d: # Backward compatibility with old cache # that uses mtime mangling. d['_mtime_'] = _os.fstat(myf.fileno())[stat.ST_MTIME] return d finally: myf.close() except (IOError, OSError) as e: if e.errno != errno.ENOENT: raise cache_errors.CacheCorruption(cpv, e) raise KeyError(cpv, e)
def _setitem(self, cpv, values): s = cpv.rfind("/") fp = os.path.join(self.location, cpv[:s], ".update.%i.%s" % (os.getpid(), cpv[s + 1:])) try: myf = io.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (IOError, OSError) as e: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) myf = io.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (OSError, IOError) as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) try: for k in self._write_keys: v = values.get(k) if not v: continue # NOTE: This format string requires unicode_literals, so that # k and v are coerced to unicode, in order to prevent TypeError # when writing raw bytes to TextIOWrapper with Python 2. myf.write("%s=%s\n" % (k, v)) finally: myf.close() self._ensure_access(fp) #update written. now we move it. new_fp = os.path.join(self.location, cpv) try: os.rename(fp, new_fp) except (OSError, IOError) as e: os.remove(fp) raise cache_errors.CacheCorruption(cpv, e)
def _delitem(self, cpv): try: os.remove(os.path.join(self.location, cpv)) except OSError as e: if errno.ENOENT == e.errno: raise KeyError(cpv) else: raise cache_errors.CacheCorruption(cpv, e)
def _setitem(self, cpv, values): # import pdb;pdb.set_trace() s = cpv.rfind("/") fp = os.path.join(self.location, cpv[:s], ".update.%i.%s" % (os.getpid(), cpv[s + 1:])) try: myf = codecs.open(_unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (IOError, OSError) as e: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) myf = codecs.open(_unicode_encode( fp, encoding=_encodings['fs'], errors='strict'), mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') except (OSError, IOError) as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) try: for k in self._write_keys: v = values.get(k) if not v: continue myf.write("%s=%s\n" % (k, v)) finally: myf.close() self._ensure_access(fp) #update written. now we move it. new_fp = os.path.join(self.location, cpv) try: os.rename(fp, new_fp) except (OSError, IOError) as e: os.remove(fp) raise cache_errors.CacheCorruption(cpv, e)
def _setitem(self, cpv, values): try: fd, fp = tempfile.mkstemp(dir=self.location) except EnvironmentError as e: raise cache_errors.CacheCorruption(cpv, e) with io.open(fd, mode='w', encoding=_encodings['repo.content'], errors='backslashreplace') as myf: for k in self._write_keys: v = values.get(k) if not v: continue # NOTE: This format string requires unicode_literals, so that # k and v are coerced to unicode, in order to prevent TypeError # when writing raw bytes to TextIOWrapper with Python 2. myf.write("%s=%s\n" % (k, v)) self._ensure_access(fp) #update written. now we move it. new_fp = os.path.join(self.location, cpv) try: os.rename(fp, new_fp) except EnvironmentError as e: success = False try: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) os.rename(fp, new_fp) success = True except EnvironmentError as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) finally: if not success: os.remove(fp)
def _getitem(self, cpv): d = {} try: myf = codecs.open(_unicode_encode(os.path.join(self.location, cpv), encoding=_encodings['fs'], errors='strict'), mode='r', encoding=_encodings['repo.content'], errors='replace') for k,v in zip(self.auxdbkey_order, myf): d[k] = v.rstrip("\n") except (OSError, IOError) as e: if errno.ENOENT == e.errno: raise KeyError(cpv) raise cache_errors.CacheCorruption(cpv, e) try: d["_mtime_"] = os.fstat(myf.fileno())[stat.ST_MTIME] except OSError as e: myf.close() raise cache_errors.CacheCorruption(cpv, e) myf.close() return d
def __getitem__(self, cpv): """set a cpv to values This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion. that said, if the class handles it, they can override it.""" if self.updates > self.sync_rate: self.commit() self.updates = 0 d = self._getitem(cpv) if self.serialize_eclasses and "_eclasses_" in d: d["_eclasses_"] = reconstruct_eclasses(cpv, d["_eclasses_"]) elif "_eclasses_" not in d: d["_eclasses_"] = {} mtime = d.get('_mtime_') if mtime is None: raise cache_errors.CacheCorruption(cpv, '_mtime_ field is missing') try: mtime = long(mtime) except ValueError: raise cache_errors.CacheCorruption( cpv, '_mtime_ conversion to long failed: %s' % (mtime, )) d['_mtime_'] = mtime return d
def _setitem(self, cpv, values): try: fd, fp = tempfile.mkstemp(dir=self.location) except EnvironmentError as e: raise cache_errors.CacheCorruption(cpv, e) with io.open(fd, mode="w", encoding=_encodings["repo.content"], errors="backslashreplace") as myf: for k in self._write_keys: v = values.get(k) if not v: continue myf.write("%s=%s\n" % (k, v)) self._ensure_access(fp) # update written. now we move it. new_fp = os.path.join(self.location, cpv) try: os.rename(fp, new_fp) except EnvironmentError as e: success = False try: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) os.rename(fp, new_fp) success = True except EnvironmentError as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) finally: if not success: os.remove(fp)
def __contains__(self, cpv): cursor = self._db_cursor cursor.execute(" ".join([ "SELECT %s FROM %s" % (self._db_table["packages"]["package_id"], self._db_table["packages"]["table_name"]), "WHERE %s=%s" % (self._db_table["packages"]["package_key"], self._db_escape_string(cpv)) ])) result = cursor.fetchall() if len(result) == 0: return False if len(result) == 1: return True raise cache_errors.CacheCorruption(cpv, "key is not unique")
def _getitem(self, cpv): try: self.con.execute( "SELECT key, value FROM %s NATURAL JOIN %s " "WHERE label=%s AND cpv=%s" % (self.SCHEMA_PACKAGE_NAME, self.SCHEMA_VALUES_NAME, self.label, self._sfilter(cpv))) except self._BaseError as e: raise cache_errors.CacheCorruption(self, cpv, e) rows = self.con.fetchall() if len(rows) == 0: raise KeyError(cpv) vals = dict([(k, "") for k in self._known_keys]) vals.update(dict(rows)) return vals
def _delitem(self, cpv): """delete a cpv cache entry derived RDBM classes for this *must* either support cascaded deletes, or override this method""" try: try: self.con.execute("DELETE FROM %s WHERE label=%s AND cpv=%s" % \ (self.SCHEMA_PACKAGE_NAME, self.label, self._sfilter(cpv))) if self.autocommits: self.commit() except self._BaseError as e: raise cache_errors.CacheCorruption(self, cpv, e) if self.con.rowcount <= 0: raise KeyError(cpv) except SystemExit: raise except Exception: if not self.autocommits: self.db.rollback() # yes, this can roll back a lot more then just the delete. deal. raise
def _getitem(self, cpv): cursor = self._db_cursor cursor.execute("select * from %s where %s=%s" % \ (self._db_table["packages"]["table_name"], self._db_table["packages"]["package_key"], self._db_escape_string(cpv))) result = cursor.fetchall() if len(result) == 1: pass elif len(result) == 0: raise KeyError(cpv) else: raise cache_errors.CacheCorruption(cpv, "key is not unique") d = {} internal_columns = self._db_table["packages"]["internal_columns"] column_index = -1 for k in self._db_table["packages"]["columns"]: column_index += 1 if k not in internal_columns: d[k] = result[0][column_index] return d
def _parse_data(self, data, cpv): _hashed_re_match = self._hashed_re.match d = {} for line in data: hashed = False hashed_match = _hashed_re_match(line) if hashed_match is None: d.clear() try: for i, key in enumerate(self.auxdbkey_order): d[key] = data[i] except IndexError: pass break else: d[hashed_match.group(1)] = hashed_match.group(2) if "_eclasses_" not in d: if "INHERITED" in d: if self.ec is None: self.ec = portage.eclass_cache.cache(self.location[:-15]) getter = attrgetter(self.validation_chf) try: ec_data = self.ec.get_eclass_data(d["INHERITED"].split()) d["_eclasses_"] = dict((k, (v.eclass_dir, getter(v))) for k, v in ec_data.items()) except KeyError as e: # INHERITED contains a non-existent eclass. raise cache_errors.CacheCorruption(cpv, e) else: d["_eclasses_"] = {} elif isinstance(d["_eclasses_"], basestring): # We skip this if flat_hash.database._parse_data() was called above # because it calls reconstruct_eclasses() internally. d["_eclasses_"] = reconstruct_eclasses(None, d["_eclasses_"]) return d
def _setitem(self, cpv, values): if "_eclasses_" in values: values = ProtectedDict(values) values["INHERITED"] = ' '.join(sorted(values["_eclasses_"])) new_content = [] for k in self.auxdbkey_order: new_content.append(values.get(k, '')) new_content.append('\n') for i in range(magic_line_count - len(self.auxdbkey_order)): new_content.append('\n') new_content = ''.join(new_content) new_content = _unicode_encode(new_content, _encodings['repo.content'], errors='backslashreplace') new_fp = os.path.join(self.location, cpv) try: f = open( _unicode_encode(new_fp, encoding=_encodings['fs'], errors='strict'), 'rb') except EnvironmentError: pass else: try: try: existing_st = os.fstat(f.fileno()) existing_content = f.read() finally: f.close() except EnvironmentError: pass else: existing_mtime = existing_st[stat.ST_MTIME] if values['_mtime_'] == existing_mtime and \ existing_content == new_content: return if self.raise_stat_collision and \ values['_mtime_'] == existing_mtime and \ len(new_content) == existing_st.st_size: raise cache_errors.StatCollision(cpv, new_fp, existing_mtime, existing_st.st_size) s = cpv.rfind("/") fp = os.path.join(self.location, cpv[:s], ".update.%i.%s" % (os.getpid(), cpv[s + 1:])) try: myf = open( _unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), 'wb') except EnvironmentError as e: if errno.ENOENT == e.errno: try: self._ensure_dirs(cpv) myf = open( _unicode_encode(fp, encoding=_encodings['fs'], errors='strict'), 'wb') except EnvironmentError as e: raise cache_errors.CacheCorruption(cpv, e) else: raise cache_errors.CacheCorruption(cpv, e) try: myf.write(new_content) finally: myf.close() self._ensure_access(fp, mtime=values["_mtime_"]) try: os.rename(fp, new_fp) except EnvironmentError as e: try: os.unlink(fp) except EnvironmentError: pass raise cache_errors.CacheCorruption(cpv, e)
def _parse_data(self, data, cpv): try: return dict(x.split("=", 1) for x in data) except ValueError as e: # If a line is missing an "=", the split length is 1 instead of 2. raise cache_errors.CacheCorruption(cpv, e)
def __getitem__(self, cpv): """set a cpv to values This shouldn't be overriden in derived classes since it handles the __eclasses__ conversion. that said, if the class handles it, they can override it.""" if self.updates > self.sync_rate: self.commit() self.updates = 0 d = self._getitem(cpv) try: chf_types = self.chf_types except AttributeError: chf_types = (self.validation_chf, ) if self.serialize_eclasses and "_eclasses_" in d: for chf_type in chf_types: if '_%s_' % chf_type not in d: # Skip the reconstruct_eclasses call, since it's # a waste of time if it contains a different chf_type # than the current one. In the past, it was possible # for reconstruct_eclasses called with chf_type='md5' # to "successfully" return invalid data here, because # it was unable to distinguish between md5 data and # mtime data. continue try: d["_eclasses_"] = reconstruct_eclasses( cpv, d["_eclasses_"], chf_type, paths=self.store_eclass_paths) except cache_errors.CacheCorruption: if chf_type is chf_types[-1]: raise else: break else: raise cache_errors.CacheCorruption( cpv, 'entry does not contain a recognized chf_type') elif "_eclasses_" not in d: d["_eclasses_"] = {} # Never return INHERITED, since portdbapi.aux_get() will # generate it automatically from _eclasses_, and we want # to omit it in comparisons between cache entries like # those that egencache uses to avoid redundant writes. d.pop("INHERITED", None) mtime_required = not any( d.get('_%s_' % x) for x in chf_types if x != 'mtime') mtime = d.get('_mtime_') if not mtime: if mtime_required: raise cache_errors.CacheCorruption(cpv, '_mtime_ field is missing') d.pop('_mtime_', None) else: try: mtime = int(mtime) except ValueError: raise cache_errors.CacheCorruption( cpv, '_mtime_ conversion to int failed: %s' % (mtime, )) d['_mtime_'] = mtime return d