def shouldFlipFirst(key): channel = key.split('/')[0] if channels.get(channel) == -1: return False if 0 <= channels.get(channel) <= 2: return True if len(index.get(key)) < 20 and not matchKey(index.get(key), ['hasFile', 'hasLink']): return False return not matchKey(index.get(key), blocklist.items())
def searchRaw(targets, searchCore=False): if not targets: # optimization of /s and /sc return [x for x in list(coreIndex)[:1000] if index.get(x)] if searchCore: # after index cleanup, x might not in index anymore space = [(x, index.get(x)) for x in list(coreIndex) if index.get(x)] else: space = index.items() for target in targets: space = [item for item in space if searchHit(target, item)] return [item[0] for item in space]
def shouldRemove(key): if key.endswith('/0'): return False if not maintext.get(key): return True channel = key.split('/')[0] if channels.get(channel) in [-2, 100]: return True if 0 <= channels.get(channel) < 2: return False if noCNnoEN(index.get(key)): return True if matchKey(index.get(key), ['hasFile', 'hasLink']): return False if timestamp.get(key, 0) < dbase.getRetain(key.split('/')[0]): return True return False
def isCNIndex(key): text = index.get(key) if not text: return False for c in text: if isCN(c): return True return False
def searchRaw(targets, searchCore=False): if searchCore: space = [(x, index.get(x)) for x in list(coreIndex)] else: space = index.items() for target in targets: space = [item for item in space if searchHit(target, item)] for item in space: yield item[0]
def slowBackfill(channel): post_id = _findLastMessage(channel) sendDebugMessage('slowBackfill', '@' + channel, post_id) start_time = time.time() while post_id > 1: post_id -= 1 key = channel + '/' + str(post_id) if index.get(key): post_id -= int(random.random() * 100) continue post = webgram.getPost(channel, post_id) if post.getIndex(): dbase.update(post) if time.time() - start_time > time_limit: break print('slowBackfill end', '@' + channel, post_id)
def cleanupChannel(keys, keepChinese=True): if not keys or len(keys) <= 100: return 0 if keepChinese: result_keys = [] for key in keys: if not isSimplified(index.get(key)): result_keys.append(key) keys = result_keys if len(keys) <= 50: return 0 sort_keys = [(getKeyScore(key), key) for key in keys] sort_keys.sort(reverse=True) count = 0 for key in sort_keys[50:]: dbase.removeKey(key[1]) count += 1 return count
def slowBackfill(channel): post_id = _findLastMessage(channel) findNew = False for _ in range(getMaxIteration(channel)): post_id -= 1 if post_id <= 1: break key = channel + '/' + str(post_id) if index.get(key): post_id -= int(random.random() * 100) continue post = webgram.getPost(channel, post_id) if post.getIndex(): findNew = True dbase.update(post) elif findNew: dbase.removeKey(key) if postTooOld(post): break
def cleanupChannel(keys): if not keys: return 0 keys = [key for key in keys if not containCN(index.get(key))] return cleanKeys(keys, 10)
def getKeyScore(key): if matchKey(index.get(key), ['hasFile', 'hasLink']): return 1 return 0