def __init__(self, dataLayer): self.dataCluster = dataLayer.getSNRedis() self.userDao = dataLayer.getCachedCrawlUserDao() self.snDao = dataLayer.getCachedCrawlSNDao() self.nodeAdj = LRUCacheDict(102400, 10) # 邻接表 nodeID-> [nodeID] self.nodeProfile = LRUCacheDict(102400, 10) # profiles self.edgeNum = 0
def memoize(f=None, key_fn=hashkey): """An in-memory cache wrapper that can be used on any function, including coroutines. """ __cache = LRUCacheDict(max_size=65536, expiration=86400) if f is None: return partial(memoize, key_fn=key_fn) @wraps(f) def wrapper(*args, **kwargs): # Simple key generation. Notice that there are no guarantees that the # key will be the same when using dict arguments. key = f.__module__ + '#' + f.__name__ + '#' + json.dumps( key_fn(*args, **kwargs)) try: val = __cache[key] if asyncio.iscoroutinefunction(f): return _wrap_value_in_coroutine(val) return val except KeyError: val = f(*args, **kwargs) if asyncio.iscoroutine(val): # If the value returned by the function is a coroutine, wrap # the future in a new coroutine that stores the actual result # in the cache. return _wrap_coroutine_storage(__cache, key, val) # Otherwise just store and return the value directly __cache[key] = val return val return wrapper
import stackdriver_logging #pylint: disable=relative-import import process #pylint: disable=relative-import HOME = os.path.expanduser('~') CLUSTERFUZZ_DIR = os.path.join(HOME, '.clusterfuzz') CLUSTERFUZZ_CACHE_DIR = os.path.join(CLUSTERFUZZ_DIR, 'cache') AUTH_FILE_LOCATION = os.path.join(CLUSTERFUZZ_CACHE_DIR, 'auth_header') CHROMIUM_SRC = os.path.join(HOME, 'chromium', 'src') CHROMIUM_OUT = os.path.join(CHROMIUM_SRC, 'out') RELEASE_ENV = os.path.join(HOME, 'RELEASE_ENV') DEPOT_TOOLS = os.path.join(HOME, 'depot_tools') SANITY_CHECKS = '/python-daemon/daemon/sanity_checks.yml' BINARY_LOCATION = '/python-daemon-data/clusterfuzz' TOOL_SOURCE = os.path.join(HOME, 'clusterfuzz-tools') TESTCASE_CACHE = LRUCacheDict(max_size=1000, expiration=172800) # The number of seconds to sleep after each test run to avoid DDOS. SLEEP_TIME = 30 Testcase = collections.namedtuple('Testcase', ['id', 'job_type']) # Configuring backoff retrying because sending a request to ClusterFuzz # might fail during a deployment. http = requests.Session() http.mount( 'https://', adapters.HTTPAdapter( # backoff_factor is 0.5. Therefore, the max wait time is 16s. retry.Retry(total=5, backoff_factor=0.5,
def __init__(self, limit: int, expiration: int = 0): from lru import LRUCacheDict self._client = LRUCacheDict(max_size=limit, expiration=expiration)
def learn_model(self, X, types, type_hierarchy=None, domains=None, ranges=None): hash_id = (sum([xi.nnz for xi in X]) + bool(types is None) + bool(type_hierarchy is None) + bool( domains is None) + bool(ranges is None)) * len(X) self.path_matrices_cache = LRUCacheDict(max_size=self.max_feats * 2) # self.path_matrices = {} if not self.dump_mem else Cache("/tmp/pracache-%d"%hash_id, timeout=float("inf")) self.path_matrices = {} if not self.dump_mem else shelve.open("pracache-%d" % hash_id) self.n_instances = X[0].shape[0] self.n_relations = len(X) self.shape = (self.n_instances, self.n_instances) for slice in X: assert slice.shape == self.shape self.syms = [] for r in range(self.n_relations): if is_symmetric(X[r]): self.syms.append(r) self.all_pos_pairs = set() for xi in X: self.all_pos_pairs = self.all_pos_pairs.union(set(zip(xi.row, xi.col))) self.all_pos_pairs = list(self.all_pos_pairs) self.X = [coo_matrix(Xi).astype(bool).tocsr() for Xi in X] if types is not None and not isinstance(types, csr_matrix): types = csr_matrix(coo_matrix(types)) self.types = types self.n_types = types.shape[1] if types is not None else 1 self.domains = domains if domains is not None else {} self.ranges = ranges if ranges is not None else {} for r in range(self.n_relations): if r not in self.domains: self.domains[r] = None if r not in self.ranges: self.ranges[r] = None self.type_hierarchy = type_hierarchy min_sup = float(self.n_instances) * self.min_sup print(min_sup) X = [self.X[r] for r in range(self.n_relations)] + [self.X[r].transpose() for r in range(self.n_relations)] self.relevant_relations = range(2 * self.n_relations) inverses = {r: (r + self.n_relations) for r in range(self.n_relations)} inverses.update({k: v for v, k in inverses.items()}) for r in self.syms: del self.relevant_relations[self.relevant_relations.index(r + self.n_relations)] gc.collect() domains = self.domains ranges = self.ranges domains.update({inverses[r]: t for r, t in self.ranges.items()}) ranges.update({inverses[r]: t for r, t in self.domains.items()}) self.domains = domains self.ranges = ranges self.path_rowscols = {} singletons = [] all_paths = [] lp1_paths = set() l_paths = set() t1 = datetime.now() print("Computing paths adjacency matrices") matrices_size = 0 for r in range(self.n_relations): if self.X[r].getnnz() and self.X[r].getnnz() >= min_sup: singletons.append(r) l_paths.add(tuple([r])) if inverses[r] in self.relevant_relations: singletons.append(inverses[r]) l_paths.add(tuple([inverses[r]])) m = self.X[r].astype(bool) matrices_size += asizeof.asizeof(m) sys.stdout.write('\r%d' % matrices_size) sys.stdout.flush() self.add_path_matrix([r], m) self.add_path_matrix([inverses[r]], m.transpose()) rows = set(np.where(self.X[r].indptr[1:] > self.X[r].indptr[:-1])[0]) cols = set(self.X[r].indices) self.path_rowscols[tuple([r])] = (rows, cols) self.path_rowscols[tuple([inverses[r]])] = (cols, rows) depth = 1 num_paths = len(singletons) computed_paths = self.n_relations * 2 all_paths.append(list(l_paths)) while depth < self.max_depth and l_paths: candidates = {} for path in l_paths: path_last_r = path[-1] for r2 in self.relevant_relations: if path_last_r != inverses[r2] and (path_last_r != r2 or r2 not in self.syms) and \ self.check_domain_range(path_last_r, r2, domains, ranges, self.type_hierarchy): new_path = list(path) + [r2] if not tuple(new_path) in candidates and not tuple( [inverses[i] for i in reversed(new_path)]) in candidates: candidates[tuple(new_path)] = self.path_relevance(path, r2) if self.max_paths_per_level < len(candidates): if self.path_selection_mode == "random": sorted_ids = np.random.choice(len(candidates), len(candidates), replace=False) else: sorted_ids = np.argsort(candidates.values()) selected = [candidates.keys()[sorted_ids[-(i + 1)]] for i in range(min(self.max_paths_per_level, len(sorted_ids)))] print("top-%d paths selected out of %d candidates" % (len(selected), len(candidates))) else: selected = candidates.keys() pbar = tqdm(total=len(selected)) for new_path in selected: path = new_path[:-1] r2 = new_path[-1] computed_paths += 2 A1 = self.get_path_matrix(path) A2 = X[r2] prod = A1.dot(A2) if prod.getnnz() and min_sup <= prod.getnnz() < self.max_nnz: matrices_size += asizeof.asizeof(prod) sys.stdout.write('\r%d' % matrices_size) sys.stdout.flush() new_path = list(path) + [r2] lp1_paths.add(tuple(new_path)) lp1_paths.add(tuple([inverses[i] for i in reversed(new_path)])) self.add_path_matrix(new_path, prod) self.add_path_matrix([inverses[i] for i in reversed(new_path)], prod.T) rows = set(np.where(self.X[r].indptr[1:] > self.X[r].indptr[:-1])[0]) cols = set(self.X[r].indices) self.path_rowscols[tuple(new_path)] = (rows, cols) self.path_rowscols[tuple([inverses[i] for i in reversed(new_path)])] = (cols, rows) num_paths += 1 pbar.update(1) pbar.close() all_paths.append(list(lp1_paths)) l_paths = lp1_paths lp1_paths = set() depth += 1 del X gc.collect() t2 = datetime.now() print("total paths = %d out of %d [computed in %f s]" % ( sum(len(l) for l in all_paths), computed_paths, (t2 - t1).total_seconds())) # print("converting to sok_matrix") if self.convert_to_sok: print("converting to sok_matrix") for p in self.matrix_paths: self.add_path_matrix(p, sok_matrix(self.get_path_matrix(p).tocoo())) # m = bcsr_matrix(self.get_path_matrix(p)) # if len(p) > 1: # m.data = None # self.add_path_matrix(p, m) self.path_domains = {} self.path_ranges = {} for paths in all_paths: for p in paths: self.path_domains[p] = domains[p[0]] self.path_ranges[p] = ranges[p[len(p) - 1]] self.domain_paths = {None: []} self.range_paths = {None: []} self.domain_paths.update({t: [] for t in range(self.n_types)}) self.range_paths.update({t: [] for t in range(self.n_types)}) for path, path_domain in self.path_domains.items(): self.domain_paths[path_domain].append(path) for path, path_range in self.path_ranges.items(): self.range_paths[path_range].append(path) print("Training relations local classifiers") self.learn_feature_weights() n_path_feats = sum([len(p) for p in self.selected_paths.values()]) n_type_feats = sum([len(p) for p in self.selected_s_types.values()]) + sum( [len(p) for p in self.selected_o_types.values()]) total_feats = n_path_feats + n_type_feats
class GraphCache: def __init__(self, dataLayer): self.dataCluster = dataLayer.getSNRedis() self.userDao = dataLayer.getCachedCrawlUserDao() self.snDao = dataLayer.getCachedCrawlSNDao() self.nodeAdj = LRUCacheDict(102400, 10) # 邻接表 nodeID-> [nodeID] self.nodeProfile = LRUCacheDict(102400, 10) # profiles self.edgeNum = 0 def close(self): self.userDao.close() self.snDao.close() # 插入一条边 def addEdge(self, start, end): if not self.nodeAdj.has_key(start): self.nodeAdj[start] = set() self.nodeAdj[start].add(end) self.edgeNum += 1 def addEdges(self, start, nodes): if not self.nodeAdj.has_key(start): self.nodeAdj[start] = set() self.nodeAdj[start].update(nodes) self.edgeNum += len(nodes) """ 删除nodeID及其出边 """ def delNode(self, nodeID): self.edgeNum -= len(self.nodeAdj[nodeID]) del self.nodeAdj[nodeID] def existNode(self, nodeID): if not self.nodeAdj.has_key(nodeID): self.fetchNode(nodeID) return self.nodeAdj.has_key(nodeID) # 获取节点集 def nodes(self): return self.nodeAdj.keys() # 获取一个节点相邻的节点,返回(node, edgeID) def neighbours(self, node): return self.nodeAdj[node] def nodeSize(self): return len(self.nodeAdj) """ extract the ego-centric network of nodeID """ def egoNetwork(self, nodeID): nodeID = strToUnicode(nodeID) rtnGraph = Graph() edgeID = 0 if self.existNode(nodeID): neighbours = self.loadNeighbours(nodeID) for neighbour in neighbours: neighbour = strToUnicode(neighbour) rtnGraph.addEdge(edgeID, nodeID, neighbour, 1.0) edgeID += 1 cNeighbours = self.loadNeighbours(neighbour) for cNeighbour in cNeighbours: if cNeighbour in neighbours: rtnGraph.addEdge(edgeID, neighbour, cNeighbour, 1.0) edgeID += 1 return rtnGraph def loadNodesName(self, nodes): profiles = {} logger.info("searching for nodes:%s" % str(nodes)) for node in nodes: rec = self.userDao.getUserProfile(node) profiles[node] = rec['name'] return profiles def loadProfiles(self, graph): profiles = {} for node in graph.nodes(): rec = self.userDao.getUserProfile(node) if rec: profiles[node] = rec return profiles def loadTags(self, graph): tags = {} for node in graph.nodes(): rec = self.userDao.getUserTags(node) if rec: tags[node] = rec graph.tags = tags """ get the neighbours of nodeID """ def loadNeighbours(self, nodeID): if not self.existNode(nodeID): self.fetchNode(nodeID) if not self.existNode(nodeID): return set() else: return self.neighbours(nodeID) """ read neighbours of nodeID from redis cluster """ def fetchNode(self, nodeID): neighbours = self.snDao.getUserFriendsID(nodeID) if len(neighbours) > 0: self.addEdges(nodeID, neighbours)
from lru import LRUCacheDict _dircache = LRUCacheDict(max_size=30, expiration=30*60)
CLUSTERFUZZ_LOG_PATH = os.path.join(CLUSTERFUZZ_DIR, 'logs', 'output.log') CLUSTERFUZZ_CACHE_DIR = os.path.join(CLUSTERFUZZ_DIR, 'cache') AUTH_FILE_LOCATION = os.path.join(CLUSTERFUZZ_CACHE_DIR, 'auth_header') CHROMIUM_SRC = os.path.join(HOME, 'chromium', 'src') CHROMIUM_OUT = os.path.join(CHROMIUM_SRC, 'out') DEPOT_TOOLS = os.path.join(HOME, 'depot_tools') SANITY_CHECKS = os.path.join(os.path.dirname(__file__), 'sanity_checks.yml') BINARY_LOCATION = '/python-daemon-data/clusterfuzz' TOOL_SOURCE = os.path.join(HOME, 'clusterfuzz-tools') MAX_PREVIEW_LOG_BYTE_COUNT = 100000 MAX_AGE = 90 * 24 * 60 * 60 # 90 days. MIN_AGE = 12 * 60 * 60 # 12 hours. REPRODUCE_TOOL_TIMEOUT = 3 * 60 * 60 # Every testcase (including the failed ones) will be run again after 2 days. PROCESSED_TESTCASE_IDS = LRUCacheDict(max_size=1000, expiration=172800) # The options that will be tested on the CI. TEST_OPTIONS = ['', '--current --skip-deps -i 20'] # The dir to be removed and checked out. See: # https://github.com/google/clusterfuzz-tools/issues/429 CLEAN_CHROMIUM_SUBDIRS = ['testing', 'third_party', 'tools'] # The number of seconds to sleep after each test run to avoid DDOS and git's # rate limit. SLEEP_TIME = 5 * 60 MINIMIZATION_ERROR_SLEEP_TIME = 15 * 60 Testcase = collections.namedtuple('Testcase', ['id', 'job_type'])
def wrapper(func): return LRUCachedFunction(func, LRUCacheDict(max_size, expiration))