def delete(self, username, reponame): # Check whether the key exists and if maybe the last change already is # a delete, else insert a `CSet.DELETE` entry without any blob data. key = self.get_query_argument("key") if username != self.current_user.name: raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = (Repo .select(Repo.id) .join(User) .where((User.name == username) & (Repo.name == reponame)) .naive() .get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) try: last = (CSet .select(CSet.time, CSet.type) .where((CSet.repo == repo) & (CSet.hkey == sha)) .order_by(CSet.time.desc()) .limit(1) .naive() .get()) except CSet.DoesNotExist: # No changeset was found for the given key - # the resource does not exist. raise HTTPError(400) if not ts > last.time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if last.type == CSet.DELETE: # The resource was deleted already, return instantly. return self.finish() # Insert the new "delete" change. CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELETE, len=0)
def delete(self, username, reponame): # Check whether the key exists and if maybe the last change already is # a delete, else insert a `CSet.DELETE` entry without any blob data. key = self.get_query_argument("key") if username != self.current_user.name: raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = (Repo.select(Repo.id).join( User).where((User.name == username) & (Repo.name == reponame)).naive().get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) try: last = (CSet.select( CSet.time, CSet.type).where((CSet.repo == repo) & (CSet.hkey == sha)).order_by( CSet.time.desc()).limit(1).naive().get()) except CSet.DoesNotExist: # No changeset was found for the given key - # the resource does not exist. raise HTTPError(400) if not ts > last.time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if last.type == CSet.DELETE: # The resource was deleted already, return instantly. return self.finish() # Insert the new "delete" change. CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELETE, len=0)
def __save_revision(repo, sha, chain, stmts, ts): # this checks if timestamp is after the last cset of the chain, not if its after all csets for key. # this allows pushing to any timestamp, if the chain is right if chain and len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise ValueError if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = __get_revision(repo, sha, chain) if stmts == prev: # No changes, nothing to be done. Bail out. return None patch = compress(join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) snapc = compress(join(stmts, "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. accumulated_len = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) base_len = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * base_len <= accumulated_len): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch)) return 0
def __save_revision_delete(repo, sha, ts): chain = __get_chain_at_ts(repo, sha, ts) if chain[-1]: if not chain[-1].type == CSet.DELETE: # only if there are csets before and the last is no delete stmts_next = set() cset_next = __get_cset_next_after_ts(repo, sha, ts) if cset_next != None: if cset_next.type == CSet.DELTA: # If next changeset is Delta, keep next revision statements chain_next = __get_chain_at_ts(repo, sha, cset_next.time) stmts_next = __get_revision(repo, sha, chain_next) elif cset_next.type == CSet.DELETE: # If next changeset is Delete, remove it __remove_cset(repo, sha, cset_next.time) # Nothing to be done if next changeset is a Snapshot if __get_cset_at_ts(repo, sha, ts): # If there is a cset at the exact time, delete it # (reconstruction of next Cset already happened) __remove_revision(repo, sha, ts) # Insert the new "delete" change CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELETE, len=0) if cset_next != None: if cset_next.type == CSet.DELTA: # If next changeset is Delta, reconstruct __remove_cset(repo, sha, cset_next.time) chain = __get_chain_at_ts(repo, sha, cset_next.time) __save_revision(repo, sha, chain, stmts_next, cset_next.time) else: # Nothing happens. Resource is already deleted. This is legitimate, so no exception needed # TODO should this be announced to client somehow? pass else: raise LookupError
def put(self, username, reponame): # Create a new revision of the resource specified by `key`. fmt = self.request.headers.get("Content-Type", "application/n-triples") key = self.get_query_argument("key", None) if username != self.current_user.name: raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = (Repo.select(Repo.id).join( User).where((User.name == username) & (Repo.name == reponame)).naive().get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) chain = list( CSet.select(CSet.time, CSet.type, CSet.len).where( (CSet.repo == repo) & (CSet.hkey == sha) & (CSet.time >= SQL( "COALESCE((SELECT time FROM cset " "WHERE repo_id = %s " "AND hkey_id = %s " "AND type != %s " "ORDER BY time DESC " "LIMIT 1), 0)", repo.id, sha, CSet.DELTA))).order_by( CSet.time).naive()) if len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if len(chain) == 0: # Mapping for `key` likely does not exist: # Store the SHA-to-KEY mapping in HMap, # looking out for possible collisions. try: HMap.create(sha=sha, val=key) except IntegrityError: val = HMap.select(HMap.val).where(HMap.sha == sha).scalar() if val != key: raise HTTPError(500) # Parse and normalize into a set of N-Quad lines stmts = parse(self.request.body, fmt) snapc = compress(join(stmts, "\n")) if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = set() blobs = (Blob.select(Blob.data).where( (Blob.repo == repo) & (Blob.hkey == sha) & (Blob.time << map(lambda e: e.time, chain))).order_by( Blob.time).naive()) for i, blob in enumerate(blobs.iterator()): data = decompress(blob.data) if i == 0: # Base snapshot for the delta chain prev.update(data.splitlines()) else: for line in data.splitlines(): mode, stmt = line[0], line[2:] if mode == "A": prev.add(stmt) else: prev.discard(stmt) if stmts == prev: # No changes, nothing to be done. Bail out. return self.finish() patch = compress( join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) blen = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * blen <= acclen): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch))
def put(self, username, reponame): # Create a new revision of the resource specified by `key`. fmt = self.request.headers.get("Content-Type", "application/n-triples") key = self.get_query_argument("key", None) if username != self.current_user.name: raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = (Repo .select(Repo.id) .join(User) .where((User.name == username) & (Repo.name == reponame)) .naive() .get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) chain = list(CSet .select(CSet.time, CSet.type, CSet.len) .where( (CSet.repo == repo) & (CSet.hkey == sha) & (CSet.time >= SQL( "COALESCE((SELECT time FROM cset " "WHERE repo_id = %s " "AND hkey_id = %s " "AND type != %s " "ORDER BY time DESC " "LIMIT 1), 0)", repo.id, sha, CSet.DELTA ))) .order_by(CSet.time) .naive()) if len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if len(chain) == 0: # Mapping for `key` likely does not exist: # Store the SHA-to-KEY mapping in HMap, # looking out for possible collisions. try: HMap.create(sha=sha, val=key) except IntegrityError: val = HMap.select(HMap.val).where(HMap.sha == sha).scalar() if val != key: raise HTTPError(500) # Parse and normalize into a set of N-Quad lines stmts = parse(self.request.body, fmt) snapc = compress(join(stmts, "\n")) if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = set() blobs = (Blob .select(Blob.data) .where( (Blob.repo == repo) & (Blob.hkey == sha) & (Blob.time << map(lambda e: e.time, chain))) .order_by(Blob.time) .naive()) for i, blob in enumerate(blobs.iterator()): data = decompress(blob.data) if i == 0: # Base snapshot for the delta chain prev.update(data.splitlines()) else: for line in data.splitlines(): mode, stmt = line[0], line[2:] if mode == "A": prev.add(stmt) else: prev.discard(stmt) if stmts == prev: # No changes, nothing to be done. Bail out. return self.finish() patch = compress(join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) blen = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * blen <= acclen): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch))