Exemple #1
0
 def __init__(self, dataLayer):
     self.dataCluster = dataLayer.getSNRedis()
     self.userDao = dataLayer.getCachedCrawlUserDao()
     self.snDao = dataLayer.getCachedCrawlSNDao()
     self.nodeAdj = LRUCacheDict(102400, 10)  # 邻接表 nodeID-> [nodeID]
     self.nodeProfile = LRUCacheDict(102400, 10)  # profiles
     self.edgeNum = 0
Exemple #2
0
def memoize(f=None, key_fn=hashkey):
    """An in-memory cache wrapper that can be used on any function, including
    coroutines.

    """
    __cache = LRUCacheDict(max_size=65536, expiration=86400)
    if f is None:
        return partial(memoize, key_fn=key_fn)

    @wraps(f)
    def wrapper(*args, **kwargs):
        # Simple key generation. Notice that there are no guarantees that the
        # key will be the same when using dict arguments.
        key = f.__module__ + '#' + f.__name__ + '#' + json.dumps(
            key_fn(*args, **kwargs))
        try:
            val = __cache[key]
            if asyncio.iscoroutinefunction(f):
                return _wrap_value_in_coroutine(val)
            return val
        except KeyError:
            val = f(*args, **kwargs)

            if asyncio.iscoroutine(val):
                # If the value returned by the function is a coroutine, wrap
                # the future in a new coroutine that stores the actual result
                # in the cache.
                return _wrap_coroutine_storage(__cache, key, val)

            # Otherwise just store and return the value directly
            __cache[key] = val
            return val

    return wrapper
Exemple #3
0
import stackdriver_logging  #pylint: disable=relative-import
import process  #pylint: disable=relative-import

HOME = os.path.expanduser('~')
CLUSTERFUZZ_DIR = os.path.join(HOME, '.clusterfuzz')
CLUSTERFUZZ_CACHE_DIR = os.path.join(CLUSTERFUZZ_DIR, 'cache')
AUTH_FILE_LOCATION = os.path.join(CLUSTERFUZZ_CACHE_DIR, 'auth_header')
CHROMIUM_SRC = os.path.join(HOME, 'chromium', 'src')
CHROMIUM_OUT = os.path.join(CHROMIUM_SRC, 'out')
RELEASE_ENV = os.path.join(HOME, 'RELEASE_ENV')
DEPOT_TOOLS = os.path.join(HOME, 'depot_tools')
SANITY_CHECKS = '/python-daemon/daemon/sanity_checks.yml'
BINARY_LOCATION = '/python-daemon-data/clusterfuzz'
TOOL_SOURCE = os.path.join(HOME, 'clusterfuzz-tools')
TESTCASE_CACHE = LRUCacheDict(max_size=1000, expiration=172800)

# The number of seconds to sleep after each test run to avoid DDOS.
SLEEP_TIME = 30

Testcase = collections.namedtuple('Testcase', ['id', 'job_type'])

# Configuring backoff retrying because sending a request to ClusterFuzz
# might fail during a deployment.
http = requests.Session()
http.mount(
    'https://',
    adapters.HTTPAdapter(
        # backoff_factor is 0.5. Therefore, the max wait time is 16s.
        retry.Retry(total=5,
                    backoff_factor=0.5,
Exemple #4
0
 def __init__(self, limit: int, expiration: int = 0):
     from lru import LRUCacheDict
     self._client = LRUCacheDict(max_size=limit, expiration=expiration)
Exemple #5
0
    def learn_model(self, X, types, type_hierarchy=None, domains=None, ranges=None):
        hash_id = (sum([xi.nnz for xi in X]) + bool(types is None) + bool(type_hierarchy is None) + bool(
            domains is None) + bool(ranges is None)) * len(X)
        self.path_matrices_cache = LRUCacheDict(max_size=self.max_feats * 2)
        # self.path_matrices = {} if not self.dump_mem else Cache("/tmp/pracache-%d"%hash_id, timeout=float("inf"))
        self.path_matrices = {} if not self.dump_mem else shelve.open("pracache-%d" % hash_id)

        self.n_instances = X[0].shape[0]
        self.n_relations = len(X)
        self.shape = (self.n_instances, self.n_instances)
        for slice in X:
            assert slice.shape == self.shape

        self.syms = []
        for r in range(self.n_relations):
            if is_symmetric(X[r]):
                self.syms.append(r)

        self.all_pos_pairs = set()
        for xi in X:
            self.all_pos_pairs = self.all_pos_pairs.union(set(zip(xi.row, xi.col)))
        self.all_pos_pairs = list(self.all_pos_pairs)
        self.X = [coo_matrix(Xi).astype(bool).tocsr() for Xi in X]

        if types is not None and not isinstance(types, csr_matrix):
            types = csr_matrix(coo_matrix(types))

        self.types = types
        self.n_types = types.shape[1] if types is not None else 1
        self.domains = domains if domains is not None else {}
        self.ranges = ranges if ranges is not None else {}
        for r in range(self.n_relations):
            if r not in self.domains:
                self.domains[r] = None
            if r not in self.ranges:
                self.ranges[r] = None
        self.type_hierarchy = type_hierarchy

        min_sup = float(self.n_instances) * self.min_sup
        print(min_sup)

        X = [self.X[r] for r in range(self.n_relations)] + [self.X[r].transpose() for r in range(self.n_relations)]

        self.relevant_relations = range(2 * self.n_relations)
        inverses = {r: (r + self.n_relations) for r in range(self.n_relations)}
        inverses.update({k: v for v, k in inverses.items()})

        for r in self.syms:
            del self.relevant_relations[self.relevant_relations.index(r + self.n_relations)]
        gc.collect()

        domains = self.domains
        ranges = self.ranges
        domains.update({inverses[r]: t for r, t in self.ranges.items()})
        ranges.update({inverses[r]: t for r, t in self.domains.items()})
        self.domains = domains
        self.ranges = ranges

        self.path_rowscols = {}
        singletons = []

        all_paths = []
        lp1_paths = set()
        l_paths = set()

        t1 = datetime.now()

        print("Computing paths adjacency matrices")
        matrices_size = 0
        for r in range(self.n_relations):
            if self.X[r].getnnz() and self.X[r].getnnz() >= min_sup:
                singletons.append(r)
                l_paths.add(tuple([r]))
                if inverses[r] in self.relevant_relations:
                    singletons.append(inverses[r])
                    l_paths.add(tuple([inverses[r]]))
            m = self.X[r].astype(bool)
            matrices_size += asizeof.asizeof(m)
            sys.stdout.write('\r%d' % matrices_size)
            sys.stdout.flush()
            self.add_path_matrix([r], m)
            self.add_path_matrix([inverses[r]], m.transpose())
            rows = set(np.where(self.X[r].indptr[1:] > self.X[r].indptr[:-1])[0])
            cols = set(self.X[r].indices)
            self.path_rowscols[tuple([r])] = (rows, cols)
            self.path_rowscols[tuple([inverses[r]])] = (cols, rows)

        depth = 1
        num_paths = len(singletons)

        computed_paths = self.n_relations * 2

        all_paths.append(list(l_paths))

        while depth < self.max_depth and l_paths:
            candidates = {}
            for path in l_paths:
                path_last_r = path[-1]

                for r2 in self.relevant_relations:
                    if path_last_r != inverses[r2] and (path_last_r != r2 or r2 not in self.syms) and \
                            self.check_domain_range(path_last_r, r2, domains, ranges, self.type_hierarchy):
                        new_path = list(path) + [r2]
                        if not tuple(new_path) in candidates and not tuple(
                                [inverses[i] for i in reversed(new_path)]) in candidates:
                            candidates[tuple(new_path)] = self.path_relevance(path, r2)

            if self.max_paths_per_level < len(candidates):
                if self.path_selection_mode == "random":
                    sorted_ids = np.random.choice(len(candidates), len(candidates), replace=False)
                else:
                    sorted_ids = np.argsort(candidates.values())
                selected = [candidates.keys()[sorted_ids[-(i + 1)]] for i in
                            range(min(self.max_paths_per_level, len(sorted_ids)))]
                print("top-%d paths selected out of %d candidates" % (len(selected), len(candidates)))
            else:
                selected = candidates.keys()

            pbar = tqdm(total=len(selected))
            for new_path in selected:
                path = new_path[:-1]
                r2 = new_path[-1]
                computed_paths += 2
                A1 = self.get_path_matrix(path)
                A2 = X[r2]
                prod = A1.dot(A2)

                if prod.getnnz() and min_sup <= prod.getnnz() < self.max_nnz:
                    matrices_size += asizeof.asizeof(prod)
                    sys.stdout.write('\r%d' % matrices_size)
                    sys.stdout.flush()
                    new_path = list(path) + [r2]
                    lp1_paths.add(tuple(new_path))
                    lp1_paths.add(tuple([inverses[i] for i in reversed(new_path)]))
                    self.add_path_matrix(new_path, prod)
                    self.add_path_matrix([inverses[i] for i in reversed(new_path)], prod.T)
                    rows = set(np.where(self.X[r].indptr[1:] > self.X[r].indptr[:-1])[0])
                    cols = set(self.X[r].indices)
                    self.path_rowscols[tuple(new_path)] = (rows, cols)
                    self.path_rowscols[tuple([inverses[i] for i in reversed(new_path)])] = (cols, rows)
                    num_paths += 1

                pbar.update(1)

            pbar.close()
            all_paths.append(list(lp1_paths))
            l_paths = lp1_paths
            lp1_paths = set()
            depth += 1

        del X
        gc.collect()

        t2 = datetime.now()

        print("total paths = %d out of %d     [computed in %f s]" % (
            sum(len(l) for l in all_paths), computed_paths, (t2 - t1).total_seconds()))

        # print("converting to sok_matrix")
        if self.convert_to_sok:
            print("converting to sok_matrix")
            for p in self.matrix_paths:
                self.add_path_matrix(p, sok_matrix(self.get_path_matrix(p).tocoo()))
                # m = bcsr_matrix(self.get_path_matrix(p))
                # if len(p) > 1:
                #    m.data = None
                # self.add_path_matrix(p, m)

        self.path_domains = {}
        self.path_ranges = {}
        for paths in all_paths:
            for p in paths:
                self.path_domains[p] = domains[p[0]]
                self.path_ranges[p] = ranges[p[len(p) - 1]]

        self.domain_paths = {None: []}
        self.range_paths = {None: []}
        self.domain_paths.update({t: [] for t in range(self.n_types)})
        self.range_paths.update({t: [] for t in range(self.n_types)})
        for path, path_domain in self.path_domains.items():
            self.domain_paths[path_domain].append(path)
        for path, path_range in self.path_ranges.items():
            self.range_paths[path_range].append(path)

        print("Training relations local classifiers")
        self.learn_feature_weights()

        n_path_feats = sum([len(p) for p in self.selected_paths.values()])
        n_type_feats = sum([len(p) for p in self.selected_s_types.values()]) + sum(
            [len(p) for p in self.selected_o_types.values()])
        total_feats = n_path_feats + n_type_feats
Exemple #6
0
class GraphCache:
    def __init__(self, dataLayer):
        self.dataCluster = dataLayer.getSNRedis()
        self.userDao = dataLayer.getCachedCrawlUserDao()
        self.snDao = dataLayer.getCachedCrawlSNDao()
        self.nodeAdj = LRUCacheDict(102400, 10)  # 邻接表 nodeID-> [nodeID]
        self.nodeProfile = LRUCacheDict(102400, 10)  # profiles
        self.edgeNum = 0

    def close(self):
        self.userDao.close()
        self.snDao.close()
    
    # 插入一条边
    def addEdge(self, start, end):
        if not self.nodeAdj.has_key(start):
            self.nodeAdj[start] = set()
        self.nodeAdj[start].add(end)
        self.edgeNum += 1
    
    def addEdges(self, start, nodes):
        if not self.nodeAdj.has_key(start):
            self.nodeAdj[start] = set()
        self.nodeAdj[start].update(nodes)
        self.edgeNum += len(nodes)
        
    """
    删除nodeID及其出边
    """
    def delNode(self, nodeID):
        self.edgeNum -= len(self.nodeAdj[nodeID])
        del self.nodeAdj[nodeID]
    
    def existNode(self, nodeID):
        if not self.nodeAdj.has_key(nodeID):
            self.fetchNode(nodeID)
        return self.nodeAdj.has_key(nodeID)
    
    # 获取节点集
    def nodes(self):
        return self.nodeAdj.keys()

    # 获取一个节点相邻的节点,返回(node, edgeID)
    def neighbours(self, node):
        return self.nodeAdj[node]

    def nodeSize(self):
        return len(self.nodeAdj)
    """
    extract the ego-centric network of nodeID
    """    
    def egoNetwork(self, nodeID):
        nodeID = strToUnicode(nodeID)
        rtnGraph = Graph()
        edgeID = 0
        if self.existNode(nodeID):
            neighbours = self.loadNeighbours(nodeID)
            for neighbour in neighbours:
                neighbour = strToUnicode(neighbour)
                rtnGraph.addEdge(edgeID, nodeID, neighbour, 1.0)
                edgeID += 1
                cNeighbours = self.loadNeighbours(neighbour)
                for cNeighbour in cNeighbours:
                    if cNeighbour in neighbours:
                        rtnGraph.addEdge(edgeID, neighbour, cNeighbour, 1.0)
                        edgeID += 1
        
        return rtnGraph

    def loadNodesName(self, nodes):
        profiles = {}
        logger.info("searching for nodes:%s" % str(nodes))
        for node in nodes:
            rec = self.userDao.getUserProfile(node)
            profiles[node] = rec['name']
        return profiles

    def loadProfiles(self, graph):
        profiles = {}
        for node in graph.nodes():
            rec = self.userDao.getUserProfile(node)
            if rec:
                profiles[node] = rec
        return profiles

    def loadTags(self, graph):
        tags = {}
        for node in graph.nodes():
            rec = self.userDao.getUserTags(node)
            if rec:
                tags[node] = rec
        graph.tags = tags
        
    """
    get the neighbours of nodeID
    """
    def loadNeighbours(self, nodeID):
        if not self.existNode(nodeID):
            self.fetchNode(nodeID)
            
        if not self.existNode(nodeID):
            return set()
        else:
            return self.neighbours(nodeID)
        
    """
    read neighbours of nodeID from redis cluster
    """
    def fetchNode(self, nodeID):
        neighbours = self.snDao.getUserFriendsID(nodeID)
        if len(neighbours) > 0:
            self.addEdges(nodeID, neighbours)
Exemple #7
0
from lru import LRUCacheDict

_dircache = LRUCacheDict(max_size=30, expiration=30*60)
CLUSTERFUZZ_LOG_PATH = os.path.join(CLUSTERFUZZ_DIR, 'logs', 'output.log')
CLUSTERFUZZ_CACHE_DIR = os.path.join(CLUSTERFUZZ_DIR, 'cache')
AUTH_FILE_LOCATION = os.path.join(CLUSTERFUZZ_CACHE_DIR, 'auth_header')
CHROMIUM_SRC = os.path.join(HOME, 'chromium', 'src')
CHROMIUM_OUT = os.path.join(CHROMIUM_SRC, 'out')
DEPOT_TOOLS = os.path.join(HOME, 'depot_tools')
SANITY_CHECKS = os.path.join(os.path.dirname(__file__), 'sanity_checks.yml')
BINARY_LOCATION = '/python-daemon-data/clusterfuzz'
TOOL_SOURCE = os.path.join(HOME, 'clusterfuzz-tools')
MAX_PREVIEW_LOG_BYTE_COUNT = 100000
MAX_AGE = 90 * 24 * 60 * 60  # 90 days.
MIN_AGE = 12 * 60 * 60  # 12 hours.
REPRODUCE_TOOL_TIMEOUT = 3 * 60 * 60

# Every testcase (including the failed ones) will be run again after 2 days.
PROCESSED_TESTCASE_IDS = LRUCacheDict(max_size=1000, expiration=172800)

# The options that will be tested on the CI.
TEST_OPTIONS = ['', '--current --skip-deps -i 20']

# The dir to be removed and checked out. See:
# https://github.com/google/clusterfuzz-tools/issues/429
CLEAN_CHROMIUM_SUBDIRS = ['testing', 'third_party', 'tools']

# The number of seconds to sleep after each test run to avoid DDOS and git's
# rate limit.
SLEEP_TIME = 5 * 60
MINIMIZATION_ERROR_SLEEP_TIME = 15 * 60

Testcase = collections.namedtuple('Testcase', ['id', 'job_type'])
 def wrapper(func):
     return LRUCachedFunction(func, LRUCacheDict(max_size, expiration))