Esempio n. 1
0
def shouldFlipFirst(key):
    channel = key.split('/')[0]
    if channels.get(channel) == -1:
        return False
    if 0 <= channels.get(channel) <= 2:
        return True
    if len(index.get(key)) < 20 and not matchKey(index.get(key),
                                                 ['hasFile', 'hasLink']):
        return False
    return not matchKey(index.get(key), blocklist.items())
Esempio n. 2
0
def searchRaw(targets, searchCore=False):
    if not targets:  # optimization of /s and /sc
        return [x for x in list(coreIndex)[:1000] if index.get(x)]
    if searchCore:
        # after index cleanup, x might not in index anymore
        space = [(x, index.get(x)) for x in list(coreIndex) if index.get(x)]
    else:
        space = index.items()
    for target in targets:
        space = [item for item in space if searchHit(target, item)]
    return [item[0] for item in space]
Esempio n. 3
0
def shouldRemove(key):
    if key.endswith('/0'):
        return False
    if not maintext.get(key):
        return True
    channel = key.split('/')[0]
    if channels.get(channel) in [-2, 100]:
        return True
    if 0 <= channels.get(channel) < 2:
        return False
    if noCNnoEN(index.get(key)):
        return True
    if matchKey(index.get(key), ['hasFile', 'hasLink']):
        return False
    if timestamp.get(key, 0) < dbase.getRetain(key.split('/')[0]):
        return True
    return False
Esempio n. 4
0
def isCNIndex(key):
    text = index.get(key)
    if not text:
        return False
    for c in text:
        if isCN(c):
            return True
    return False
Esempio n. 5
0
def searchRaw(targets, searchCore=False):
    if searchCore:
        space = [(x, index.get(x)) for x in list(coreIndex)]
    else:
        space = index.items()
    for target in targets:
        space = [item for item in space if searchHit(target, item)]
    for item in space:
        yield item[0]
Esempio n. 6
0
def slowBackfill(channel):
    post_id = _findLastMessage(channel)
    sendDebugMessage('slowBackfill', '@' + channel, post_id)
    start_time = time.time()
    while post_id > 1:
        post_id -= 1
        key = channel + '/' + str(post_id)
        if index.get(key):
            post_id -= int(random.random() * 100)
            continue
        post = webgram.getPost(channel, post_id)
        if post.getIndex():
            dbase.update(post)
        if time.time() - start_time > time_limit:
            break
    print('slowBackfill end', '@' + channel, post_id)
Esempio n. 7
0
def cleanupChannel(keys, keepChinese=True):
	if not keys or len(keys) <= 100:
		return 0 
	if keepChinese:
		result_keys = []
		for key in keys:
			if not isSimplified(index.get(key)):
				result_keys.append(key)
		keys = result_keys
	if len(keys) <= 50:
		return 0
	sort_keys = [(getKeyScore(key), key) for key in keys]
	sort_keys.sort(reverse=True)
	count = 0
	for key in sort_keys[50:]:
		dbase.removeKey(key[1])
		count += 1
	return count
Esempio n. 8
0
def slowBackfill(channel):
    post_id = _findLastMessage(channel)
    findNew = False
    for _ in range(getMaxIteration(channel)):
        post_id -= 1
        if post_id <= 1:
            break
        key = channel + '/' + str(post_id)
        if index.get(key):
            post_id -= int(random.random() * 100)
            continue
        post = webgram.getPost(channel, post_id)
        if post.getIndex():
            findNew = True
            dbase.update(post)
        elif findNew:
            dbase.removeKey(key)
        if postTooOld(post):
            break
Esempio n. 9
0
def cleanupChannel(keys):
    if not keys:
        return 0
    keys = [key for key in keys if not containCN(index.get(key))]
    return cleanKeys(keys, 10)
Esempio n. 10
0
def getKeyScore(key):
	if matchKey(index.get(key), ['hasFile', 'hasLink']):
		return 1
	return 0