def filter_hits(hits_len, hits, prior_check = False, site_check = False, show_site = 0): if not seg_hashes: return hits_len, hits if prior_check: uniq = dict((seg_hashes[sid], sid) for sid in ainodex.hit_contents(hits) if ainopy.docinfo(ainopy.sid2did(sid))[1] == 1.0) else: uniq = dict((seg_hashes[sid], sid) for sid in ainodex.hit_contents(hits)) if site_check: if show_site: for key, sid in uniq.items(): h = sites[ainopy.did2key(ainopy.sid2did(sid))] if h != show_site: del uniq[key] else: u = {} for sid in uniq.itervalues(): h = sites[ainopy.did2key(ainopy.sid2did(sid))] u[h] = sid uniq = u return len(uniq), ainodex.list_to_hits(uniq.values())
def filter_hits(hits_len, hits, prior_check=False, site_check=False, show_site=0): if not seg_hashes: return hits_len, hits if prior_check: uniq = dict((seg_hashes[sid], sid) for sid in ainodex.hit_contents(hits) if ainopy.docinfo(ainopy.sid2did(sid))[1] == 1.0) else: uniq = dict( (seg_hashes[sid], sid) for sid in ainodex.hit_contents(hits)) if site_check: if show_site: for key, sid in uniq.items(): h = sites[ainopy.did2key(ainopy.sid2did(sid))] if h != show_site: del uniq[key] else: u = {} for sid in uniq.itervalues(): h = sites[ainopy.did2key(ainopy.sid2did(sid))] u[h] = sid uniq = u return len(uniq), ainodex.list_to_hits(uniq.values())
def read_sites(): last_key = ainopy.did2key(ainopy.info()["Number of documents"] - 1) first_key = ainopy.did2key(0) sites = {} for l in file(NAME + ".sitehash"): key, hash = map(int, l.split()) key -= 1 if key > last_key: break if key >= first_key: sites[key] = hash return sites