def list_blob(url): _, ext = os.path.splitext(url) if ext in acceptable: # TODO: change to check if url contains valid image blob = Blob(url) dset.blobs.append(blob) return
def __remove_csets(repo, sha): # remove blobs q_blobs = Blob.delete().where(Blob.repo == repo, Blob.hkey == sha) q_blobs.execute() # remove csets q_csets = CSet.delete().where(CSet.repo == repo, CSet.hkey == sha) q_csets.execute()
def __remove_csets_repo(repo): # remove blobs q_blobs = Blob.delete().where(Blob.repo == repo) q_blobs.execute() # remove csets q_csets = CSet.delete().where(CSet.repo == repo) q_csets.execute()
def __save_revision(repo, sha, chain, stmts, ts): # this checks if timestamp is after the last cset of the chain, not if its after all csets for key. # this allows pushing to any timestamp, if the chain is right if chain and len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise ValueError if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = __get_revision(repo, sha, chain) if stmts == prev: # No changes, nothing to be done. Bail out. return None patch = compress(join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) snapc = compress(join(stmts, "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. accumulated_len = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) base_len = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * base_len <= accumulated_len): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch)) return 0
def __remove_cset(repo, sha, ts): # remove cset try: cset = CSet.get(CSet.repo == repo, CSet.hkey == sha, CSet.time == ts) count = cset.delete_instance() except CSet.DoesNotExist: return None # remove blob try: blob = Blob.get(Blob.repo == repo, Blob.hkey == sha, Blob.time == ts) blob.delete_instance() except Blob.DoesNotExist: return None
def file_upload(): form = BlobForm() if form.validate_on_submit(): print "Uploaded file: " + upload = _, ext = os.path.splitext(upload.filename) with tempfile.NamedTemporaryFile(dir=config.BLOB_DIR, prefix='image', suffix=ext, delete=False) as tmp: filename = os.path.basename( blob = Blob( db.session.add(blob) db.session.commit() return jsonify( else: return jsonify(results=0, errors=form.file.errors)
def put(self, username, reponame): # Create a new revision of the resource specified by `key`. fmt = self.request.headers.get("Content-Type", "application/n-triples") key = self.get_query_argument("key", None) if username != raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = ( User).where(( == username) & ( == reponame)).naive().get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) chain = list(, CSet.type, CSet.len).where( (CSet.repo == repo) & (CSet.hkey == sha) & (CSet.time >= SQL( "COALESCE((SELECT time FROM cset " "WHERE repo_id = %s " "AND hkey_id = %s " "AND type != %s " "ORDER BY time DESC " "LIMIT 1), 0)",, sha, CSet.DELTA))).order_by( CSet.time).naive()) if len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if len(chain) == 0: # Mapping for `key` likely does not exist: # Store the SHA-to-KEY mapping in HMap, # looking out for possible collisions. try: HMap.create(sha=sha, val=key) except IntegrityError: val = == sha).scalar() if val != key: raise HTTPError(500) # Parse and normalize into a set of N-Quad lines stmts = parse(self.request.body, fmt) snapc = compress(join(stmts, "\n")) if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = set() blobs = ( (Blob.repo == repo) & (Blob.hkey == sha) & (Blob.time << map(lambda e: e.time, chain))).order_by( Blob.time).naive()) for i, blob in enumerate(blobs.iterator()): data = decompress( if i == 0: # Base snapshot for the delta chain prev.update(data.splitlines()) else: for line in data.splitlines(): mode, stmt = line[0], line[2:] if mode == "A": prev.add(stmt) else: prev.discard(stmt) if stmts == prev: # No changes, nothing to be done. Bail out. return self.finish() patch = compress( join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) blen = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * blen <= acclen): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch))
def get(self, username, reponame): timemap = self.get_query_argument("timemap", "false") == "true" index = self.get_query_argument("index", "false") == "true" key = self.get_query_argument("key", None) if (index and timemap) or (index and key) or (timemap and not key): raise HTTPError(400) if self.get_query_argument("datetime", None): datestr = self.get_query_argument("datetime") ts = date(datestr, QSDATEFMT) elif "Accept-Datetime" in self.request.headers: datestr = self.request.headers.get("Accept-Datetime") ts = date(datestr, RFC1123DATEFMT) else: ts = now() try: repo = ( User).where(( == username) & ( == reponame)).naive().get()) except Repo.DoesNotExist: raise HTTPError(404) if key and not timemap: # Recreate the resource for the given key in its latest state - # if no `datetime` was provided - or in the state it was in at # the time indicated by the passed `datetime` argument. self.set_header("Content-Type", "application/n-quads") self.set_header("Vary", "accept-datetime") sha = shasum(key.encode("utf-8")) # Fetch all relevant changes from the last "non-delta" onwards, # ordered by time. The returned delta-chain consists of either: # a snapshot followed by 0 or more deltas, or # a single delete. chain = list(, CSet.type).where((CSet.repo == repo) & ( CSet.hkey == sha) & (CSet.time <= ts) & (CSet.time >= SQL( "COALESCE((SELECT time FROM cset " "WHERE repo_id = %s " "AND hkey_id = %s " "AND time <= %s " "AND type != %s " "ORDER BY time DESC " "LIMIT 1), 0)",, sha, ts, CSet.DELTA))). order_by(CSet.time).naive()) if len(chain) == 0: # A resource does not exist for the given key. raise HTTPError(404) timegate_url = (self.request.protocol + "://" + + self.request.path) timemap_url = (self.request.protocol + "://" + + self.request.uri + "&timemap=true") self.set_header( "Link", '<%s>; rel="original"' ', <%s>; rel="timegate"' ', <%s>; rel="timemap"' % (key, timegate_url, timemap_url)) self.set_header("Memento-Datetime", chain[-1].time.strftime(RFC1123DATEFMT)) if chain[0].type == CSet.DELETE: # The last change was a delete. Return a 404 response with # appropriate "Link" and "Memento-Datetime" headers. raise HTTPError(404) # Load the data required in order to restore the resource state. blobs = ( (Blob.repo == repo) & (Blob.hkey == sha) & (Blob.time << map(lambda e: e.time, chain))).order_by( Blob.time).naive()) if len(chain) == 1: # Special case, where we can simply return # the blob data of the snapshot. snap = blobs.first().data return self.finish(decompress(snap)) stmts = set() for i, blob in enumerate(blobs.iterator()): data = decompress( if i == 0: # Base snapshot for the delta chain stmts.update(data.splitlines()) else: for line in data.splitlines(): mode, stmt = line[0], line[2:] if mode == "A": stmts.add(stmt) else: stmts.discard(stmt) self.write(join(stmts, "\n")) elif key and timemap: # Generate a timemap containing historic change information # for the requested key. The timemap is in the default link-format # or as JSON ( sha = shasum(key.encode("utf-8")) csets = ( CSet.time).where((CSet.repo == repo) & (CSet.hkey == sha)).order_by( CSet.time.desc()).naive()) # TODO: Paginate? csit = csets.iterator() try: first = except StopIteration: # Resource for given key does not exist. raise HTTPError(404) req = self.request base = req.protocol + "://" + + req.path accept = self.request.headers.get("Accept", "") if "application/json" in accept or "*/*" in accept: self.set_header("Content-Type", "application/json") self.write('{"original_uri": ' + json_encode(key)) self.write(', "mementos": {"list":[') m = ('{{"datetime": "{0}", "uri": "' + base + '?key=' + url_escape(key) + '&datetime={1}"}}') self.write( m.format(first.time.isoformat(), first.time.strftime(QSDATEFMT))) for cs in csit: self.write(', ' + m.format(cs.time.isoformat(), cs.time.strftime(QSDATEFMT))) self.write(']}') self.write('}') else: m = (',\n' '<' + base + '?key=' + url_escape(key) + '&datetime={0}>' '; rel="memento"' '; datetime="{1}"' '; type="application/n-quads"') self.set_header("Content-Type", "application/link-format") self.write('<' + key + '>; rel="original"') self.write( m.format(first.time.strftime(QSDATEFMT), first.time.strftime(RFC1123DATEFMT))) for cs in csit: self.write( m.format(cs.time.strftime(QSDATEFMT), cs.time.strftime(RFC1123DATEFMT))) elif index: # Generate an index of all URIs contained in the dataset at the # provided point in time or in its current state. self.set_header("Vary", "accept-datetime") self.set_header("Content-Type", "text/plain") page = int(self.get_query_argument("page", "1")) # Subquery for selecting max. time per hkey group mx = ( CSet.hkey, fn.Max(CSet.time).alias("maxtime")).where( (CSet.repo == repo) & (CSet.time <= ts)).group_by( CSet.hkey).order_by(CSet.hkey).paginate( page, INDEX_PAGE_SIZE).alias("mx")) # Query for all the relevant csets (those with max. time values) cs = (, CSet.time).join( mx, on=((CSet.hkey == mx.c.hkey_id) & (CSet.time == mx.c.maxtime) )).where((CSet.repo == repo) & (CSet.type != CSet.DELETE)).alias("cs")) # Join with the hmap table to retrieve the plain key values hm = ( cs, on=(HMap.sha == cs.c.hkey_id)).naive()) for h in hm.iterator(): self.write(h.val + "\n") else: raise HTTPError(400)
def put(self, username, reponame): # Create a new revision of the resource specified by `key`. fmt = self.request.headers.get("Content-Type", "application/n-triples") key = self.get_query_argument("key", None) if username != raise HTTPError(403) if not key: raise HTTPError(400) datestr = self.get_query_argument("datetime", None) ts = datestr and date(datestr, QSDATEFMT) or now() try: repo = (Repo .select( .join(User) .where(( == username) & ( == reponame)) .naive() .get()) except Repo.DoesNotExist: raise HTTPError(404) sha = shasum(key.encode("utf-8")) chain = list(CSet .select(CSet.time, CSet.type, CSet.len) .where( (CSet.repo == repo) & (CSet.hkey == sha) & (CSet.time >= SQL( "COALESCE((SELECT time FROM cset " "WHERE repo_id = %s " "AND hkey_id = %s " "AND type != %s " "ORDER BY time DESC " "LIMIT 1), 0)",, sha, CSet.DELTA ))) .order_by(CSet.time) .naive()) if len(chain) > 0 and not ts > chain[-1].time: # Appended timestamps must be monotonically increasing! raise HTTPError(400) if len(chain) == 0: # Mapping for `key` likely does not exist: # Store the SHA-to-KEY mapping in HMap, # looking out for possible collisions. try: HMap.create(sha=sha, val=key) except IntegrityError: val = == sha).scalar() if val != key: raise HTTPError(500) # Parse and normalize into a set of N-Quad lines stmts = parse(self.request.body, fmt) snapc = compress(join(stmts, "\n")) if len(chain) == 0 or chain[0].type == CSet.DELETE: # Provide dummy value for `patch` which is never stored. # If we get here, we always store a snapshot later on! patch = "" else: # Reconstruct the previous state of the resource prev = set() blobs = (Blob .select( .where( (Blob.repo == repo) & (Blob.hkey == sha) & (Blob.time << map(lambda e: e.time, chain))) .order_by(Blob.time) .naive()) for i, blob in enumerate(blobs.iterator()): data = decompress( if i == 0: # Base snapshot for the delta chain prev.update(data.splitlines()) else: for line in data.splitlines(): mode, stmt = line[0], line[2:] if mode == "A": prev.add(stmt) else: prev.discard(stmt) if stmts == prev: # No changes, nothing to be done. Bail out. return self.finish() patch = compress(join( map(lambda s: "D " + s, prev - stmts) + map(lambda s: "A " + s, stmts - prev), "\n")) # Calculate the accumulated size of the delta chain including # the (potential) patch from the previous to the pushed state. acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch) blen = len(chain) > 0 and chain[0].len or 0 # base length if (len(chain) == 0 or chain[0].type == CSet.DELETE or len(snapc) <= len(patch) or SNAPF * blen <= acclen): # Store the current state as a new snapshot Blob.create(repo=repo, hkey=sha, time=ts, data=snapc) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT, len=len(snapc)) else: # Store a directed delta between the previous and current state Blob.create(repo=repo, hkey=sha, time=ts, data=patch) CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA, len=len(patch))
def unarchive_blob(item, dset, tmpd, archive): archive.extract(item, tmpd) # TODO: change to check if path contains valid image blob = Blob(os.path.join(str(tmpd), item.filename)) dset.blobs.append(blob) return