Beispiel #1
0
def filter_hits(hits_len, hits, prior_check = False, site_check = False, show_site = 0):
	if not seg_hashes:
		return hits_len, hits

	if prior_check:
		uniq = dict((seg_hashes[sid], sid)
			for sid in ainodex.hit_contents(hits)
				if ainopy.docinfo(ainopy.sid2did(sid))[1] == 1.0)
	else:
		uniq = dict((seg_hashes[sid], sid)
			for sid in ainodex.hit_contents(hits))
	
	if site_check:
		if show_site:
			for key, sid in uniq.items():
				h = sites[ainopy.did2key(ainopy.sid2did(sid))]
				if h != show_site:
					del uniq[key]
		else:
			u = {}
			for sid in uniq.itervalues():
				h = sites[ainopy.did2key(ainopy.sid2did(sid))]
				u[h] = sid
			uniq = u

	return len(uniq), ainodex.list_to_hits(uniq.values())
Beispiel #2
0
def filter_hits(hits_len,
                hits,
                prior_check=False,
                site_check=False,
                show_site=0):
    if not seg_hashes:
        return hits_len, hits

    if prior_check:
        uniq = dict((seg_hashes[sid], sid)
                    for sid in ainodex.hit_contents(hits)
                    if ainopy.docinfo(ainopy.sid2did(sid))[1] == 1.0)
    else:
        uniq = dict(
            (seg_hashes[sid], sid) for sid in ainodex.hit_contents(hits))

    if site_check:
        if show_site:
            for key, sid in uniq.items():
                h = sites[ainopy.did2key(ainopy.sid2did(sid))]
                if h != show_site:
                    del uniq[key]
        else:
            u = {}
            for sid in uniq.itervalues():
                h = sites[ainopy.did2key(ainopy.sid2did(sid))]
                u[h] = sid
            uniq = u

    return len(uniq), ainodex.list_to_hits(uniq.values())
Beispiel #3
0
def read_sites():
	last_key = ainopy.did2key(ainopy.info()["Number of documents"] - 1)
	first_key = ainopy.did2key(0)
	sites = {}
	for l in file(NAME + ".sitehash"):
	        key, hash = map(int, l.split())
		key -= 1
		if key > last_key:
			break
		if key >= first_key:
			sites[key] = hash
	return sites
Beispiel #4
0
def read_sites():
    last_key = ainopy.did2key(ainopy.info()["Number of documents"] - 1)
    first_key = ainopy.did2key(0)
    sites = {}
    for l in file(NAME + ".sitehash"):
        key, hash = map(int, l.split())
        key -= 1
        if key > last_key:
            break
        if key >= first_key:
            sites[key] = hash
    return sites