def from_url(url: HttpUrl): """Generate Full ISCC from URL.""" result = iscc_from_url(url, guess=True) result["title"] = result.pop("norm_title") result["title_trimmed"] = result["title"] components = iscc_split(result["iscc"]) result["bits"] = [code_to_bits(c) for c in components] return result
def instance_id(file: UploadFile = File(...)): """Generate Instance-ID from raw binary data""" iid, tophash = iscc.instance_id(file.file) return { "code": iid, "bits": code_to_bits(iid), "ident": code_to_int(iid), "tophash": tophash, }
def content_id_text(text: Text): """Generate ContentID-Text from 'text'""" cid_t = iscc.content_id_text(text.text) return { "gmt": "text", "bits": code_to_bits(cid_t), "code": cid_t, "ident": code_to_int(cid_t), }
def data_and_instance_id(file: UploadFile = File(..., )): """Generate Data-ID and Instance-ID from raw binary data""" did = iscc.data_id(file.file) file.file.seek(0) iid, tophash = iscc.instance_id(file.file) return { "data_id": { "code": did, "bits": code_to_bits(did), "ident": code_to_int(did), }, "instance_id": { "code": iid, "bits": code_to_bits(iid), "ident": code_to_int(iid), "tophash": tophash, }, }
def meta_id(meta: Metadata): """Generate MetaID from 'title' and optional 'extra' metadata""" extra = meta.extra or "" mid, title_trimmed, extra_trimmed = iscc.meta_id(meta.title, extra) result = { "code": mid, "bits": code_to_bits(mid), "ident": code_to_int(mid), "title": meta.title, "title_trimmed": title_trimmed, } if extra: result["extra"] = extra result["extra_trimmed"] = extra_trimmed return result
def lookup(iscc: str): """Lookup an ISCC Code""" client = get_client() if client is None: raise HTTPException(HTTP_503_SERVICE_UNAVAILABLE, "ISCC lookup service not available") try: iscc_verify(iscc) except ValueError as e: raise HTTPException(HTTP_400_BAD_REQUEST, str(e)) components = iscc_split(iscc) results = [] seen = set() for component in components: response = client.liststreamkeyitems(ISCC_STREAM, component, True, 100, 0, True) for result in response: txid = result.get("txid") if txid is None or txid in seen: continue results.append(result) seen.add(txid) result = stream_filter.search(results) cleaned = [] for entry in result: keys = entry["keys"] # Better be conservative until we have a similarity based index. # So for now we only match if at least two components are identical. matches = set(keys).intersection(set(components)) if not len(matches) >= 2: continue keys = add_placeholders(keys) entry["bits"] = [code_to_bits(c) for c in keys] while len(entry["bits"]) < 4: entry["bits"].append("0" * 64) cleaned.append(entry) return cleaned
def from_file(file: UploadFile = File(...), title: str = Form(""), extra: str = Form("")): """Generate Full ISCC Code from Media File with optional explicit metadata.""" media_type = detector.from_buffer(file.file) if media_type not in SUPPORTED_MIME_TYPES: raise HTTPException( HTTP_415_UNSUPPORTED_MEDIA_TYPE, "Unsupported media type '{}'. Please request support at " "https://github.com/iscc/iscc-service/issues.".format(media_type), ) if media_type == "application/x-mobipocket-ebook": file.file.seek(0) tempdir, filepath = mobi.extract(file.file) tika_result = parser.from_file(filepath) shutil.rmtree(tempdir) else: file.file.seek(0) tika_result = parser.from_buffer(file.file) if not title: title = get_title(tika_result, guess=True) mid, norm_title, norm_extra = iscc.meta_id(title, extra) gmt = mime_to_gmt(media_type) if gmt == GMT.IMAGE: file.file.seek(0) cid = iscc.content_id_image(file.file) elif gmt == GMT.TEXT: text = tika_result["content"] if not text: raise HTTPException(HTTP_422_UNPROCESSABLE_ENTITY, "Could not extract text") cid = iscc.content_id_text(tika_result["content"]) elif gmt == GMT.AUDIO: file.file.seek(0) features = audio_id.get_chroma_vector(file.file) cid = audio_id.content_id_audio(features) elif gmt == GMT.VIDEO: file.file.seek(0) _, ext = splitext(file.filename) fn = "{}{}".format(uuid.uuid4(), ext) tmp_path = join(APP_DIR, fn) with open(tmp_path, "wb") as buffer: shutil.copyfileobj(file.file, buffer) features = video_id.get_frame_vectors(tmp_path) cid = video_id.content_id_video(features) os.remove(tmp_path) file.file.seek(0) did = iscc.data_id(file.file) file.file.seek(0) iid, tophash = iscc.instance_id(file.file) if not norm_title: iscc_code = "-".join((cid, did, iid)) else: iscc_code = "-".join((mid, cid, did, iid)) components = iscc_split(iscc_code) result = dict( iscc=iscc_code, tophash=tophash, gmt=gmt, bits=[code_to_bits(c) for c in components], ) if norm_title: result["title"] = title result["title_trimmed"] = norm_title if norm_extra: result["extra"] = extra result["extra_trimmed"] = norm_extra file.file.close() return result
def data_id(file: UploadFile = File(...)): """Generate Data-ID from raw binary data""" did = iscc.data_id(file.file) return {"code": did, "bits": code_to_bits(did), "ident": code_to_int(did)}