Ejemplo n.º 1
0
class SVCompIndex(Task):
    out_dir = Parameter("./index/")
    bench_prefix = Parameter("../sv-benchmarks/c/")

    def __init__(self, svcomp_name, directory, csv=False):
        self.directory = directory
        self.svcomp_name = svcomp_name
        self.csv = csv

    def require(self):
        pass

    def __taskid__(self):
        return "SVCompIndex_%s" % self.svcomp_name

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    def run(self):

        results = index_svcomp.parse(self.directory, self.bench_prefix.value,
                                     self.csv)

        with self.output() as o:
            o.emit(results)
Ejemplo n.º 2
0
class CategoryLookupTask(Task):

    graphPaths = Parameter('./graphs/')
    out_dir = Parameter('./graphs/')
    max_size = Parameter(10000)
    svcomp = Parameter('svcomp15')
    benchmark_prefix = Parameter('../../..')

    def __init__(self, category):
        self.category = category

    def require(self):
        pass

    def output(self):
        return ManagedTarget(self)

    def __taskid__(self):
        return 'CategoryLookupTask_%s' % self.category

    def run(self):
        svcomp = select_svcomp(self.svcomp.value)
        results = svcomp.read_category(
                                            self.graphPaths.value,
                                            self.category,
                                            self.max_size.value,
                                            prefix=self.benchmark_prefix.value
                                        )

        assert results

        with self.output() as o:
            o.emit((self.category, results))
Ejemplo n.º 3
0
class PCAFeatureTask(Task):
    out_dir = Parameter('./gram/')
    components = Parameter(0.99)
    whiten = Parameter(False)

    def __init__(self, h, D, category=None, task_type=None, kernel=None):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type
        self.kernel = kernel

    def require(self):
        if self.kernel is None:
            return PreparedFeatureTask(self.h, self.D, self.category,
                                       self.task_type)
        h = [h for h in range(self.h + 1)]
        return BagNormalizeGramTask(h, self.D, self.category, self.task_type)

    def __taskid__(self):
        s = 'PCAFeatureTask_%d_%d' % (self.h, self.D)
        if self.category is not None:
            s += '_' + str(containerHash(self.category))
        if self.task_type is not None:
            s += '_' + str(self.task_type)
        if self.kernel is not None:
            s += '_' + str(self.kernel)
        return s

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def run(self):
        with self.input()[0] as i:
            D = i.query()

        graphIndex = D['graphIndex']

        if self.kernel is None:
            X = coo_matrix((D['data'], (D['rows'], D['columns'])),
                           shape=(D['row_shape'], D['column_shape']),
                           dtype=np.float64).todense()
            pca = PCA(n_components=self.components.value,
                      whiten=self.whiten.value)
        else:
            X = np.array(D['data'])
            pca = KernelPCA(n_components=500,
                            kernel='precomputed',
                            n_jobs=-1,
                            remove_zero_eig=True)

        X = pca.fit_transform(X)
        print('Reduced features: %s' % str(X.shape))

        with self.output() as o:
            o.emit({'graphIndex': graphIndex, 'matrix': X.tolist()})
Ejemplo n.º 4
0
class BagFeatureTask(Task):
    """DEPRECATED."""

    out_dir = Parameter('./gram/')
    svcomp = Parameter('svcomp15')

    def __init__(self, h, D, category=None, task_type=None):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type

    def require(self):
        return [BagGraphIndexTask(self.h,
                                  self.D,
                                  self.category, self.task_type),
                BagFilterTask(self.h, self.D,
                              self.category, self.task_type)]

    def __taskid__(self):
        cat = 'all'
        if self.category is not None:
            cat = str(containerHash(self.category))

        tt = ''
        if self.task_type is not None:
            tt = '_'+str(self.task_type)

        return 'BagFeatureTask_%d_%d_%s' % (self.h, self.D, cat)\
               + tt

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(
            LocalTarget(path, service=FeatureJsonService)
        )

    def run(self):
        with self.input()[0] as i:
            graphIndex = i.query()

        with self.input()[1] as i:
            bag = ProgramBags(content=i.query(), svcomp=self.svcomp.value)

        bag.graphIndex = graphIndex

        features = bag.features()

        out = {
            'graphIndex': bag.graphIndex,
            'nodeIndex': bag.nodeIndex,
            'features': features
        }

        with self.output() as o:
            o.emit(out)
Ejemplo n.º 5
0
class SVGraphTask(Task):
    out_dir = Parameter("./dfs")
    cpaChecker = Parameter("")
    bench_path = Parameter("")
    localize = Parameter(None)

    def init(self, task):
        self.task = task

    def _localize(self, path):
        if self.localize.value is not None:
            return path.replace(self.localize.value[0], self.localize.value[1])
        return path

    def require(self):
        return None

    def __taskid__(self):
        tid = self.task
        tid = tid.replace("/", "_")
        tid = tid.replace("\\.", "_")
        return tid

    def output(self):
        return FileTarget("%s/%s.dfs" %
                          (self.out_dir.value, self.__taskid__()))

    def run(self):
        out_path = self.output().path

        path_to_source = self._localize(
            abspath("%s/%s" % (self.bench_path.value, self.task)))

        __path_to_cpachecker__ = self.cpaChecker.value
        cpash_path = join(__path_to_cpachecker__, 'scripts', 'cpa.sh')

        if not isdir(__path_to_cpachecker__):
            raise ValueError('CPAChecker directory not found')
        if not (isfile(path_to_source) and (path_to_source.endswith('.i')
                                            or path_to_source.endswith('.c'))):
            raise ValueError('path_to_source is no valid filepath. [%s]' %
                             path_to_source)

        proc = subprocess.run([
            cpash_path, '-graphgen', '-heap', self.heap.value, path_to_source,
            '-setprop',
            "graphGen.output=%s" % out_path
        ],
                              check=False,
                              stdout=PIPE,
                              stderr=PIPE)
        match_vresult = re.search(r'Verification\sresult:\s([A-Z]+)\.',
                                  str(proc.stdout))
        if match_vresult is None:
            raise ValueError('Invalid output of CPAChecker.')
Ejemplo n.º 6
0
class GraphPruningTask(Task):
    out_dir = Parameter('./out/graph/')
    allowedTypes = Parameter([1, 2, 3, 4])
    timeout = Parameter(None)

    def __get_allowed(self):
        types = self.allowedTypes.value
        if not all([EdgeType(t) in EdgeType for t in types]):
            raise ValueError('Unknown edge type detected')

        return [EdgeType(t) for t in types]

    def __init__(self, name, maxDepth):
        self.name = name
        self.maxDepth = maxDepth

    def require(self):
        return GraphTask(self.name)

    def run(self):
        with self.input()[0] as i:
            graph = i.query()

        remV = set([])
        remE = set([])

        types = self.__get_allowed()

        for n, nbrs in graph.adjacency():
            depth = graph.node[n]['depth']
            if depth > self.maxDepth:
                remV.add(n)
            for nbr, keydict in nbrs.items():
                for key, eattr in keydict.items():
                    tick(self)
                    if eattr['type'] not in types:
                        remE.add((n, nbr))

        graph.remove_nodes_from(remV)
        graph.remove_edges_from(remE)

        with self.output() as o:
            o.emit(graph)

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.pickle'
        return CachedTarget(LocalTarget(path, service=NetworkXService))

    def __taskid__(self):
        return 'GraphPruneTask_' +\
                self.name +\
                ('_prune_%d' % self.maxDepth) +\
                ('_types_%s' % '_'.join(
                            [str(v) for v in self.allowedTypes.value]))
Ejemplo n.º 7
0
class hDGridTask(Task):
    out_dir = Parameter('./evaluation/')
    timeout = Parameter(None)

    def __init__(self, graphs, train_index, h_Set, D_Set):
        self.graphs = graphs
        self.train_index = train_index
        self.h_Set = h_Set
        self.D_Set = D_Set

    def require(self):
        param_grid = {'h': self.h_Set, 'D': self.D_Set}
        out = []
        for params in ParameterGrid(param_grid):
            out.append(
                Optional(
                    CGridTask(self.graphs, self.train_index, params['h'],
                              params['D'])))
        return out

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return "hDGrid_%s_%s_%s_%s" %\
                    (str(containerHash(self.graphs)),
                     str(containerHash(self.train_index)),
                     str(containerHash(self.h_Set)),
                     str(containerHash(self.D_Set)))

    def __repr__(self):
        return 'hDGrid(h: %d, D: %d)' % (self.h, self.D)

    def run(self):
        max_param = None
        max_mean = -math.inf

        for inp in self.input():

            tick(self)

            if inp is not None:
                with inp as i:
                    param = i.query()
                    if param['mean'] > max_mean:
                        max_mean = param['mean']
                        max_param = param
                    del param

        with self.output() as o:
            o.emit(max_param)
Ejemplo n.º 8
0
class BagNodeIndexTask(Task):
    out_dir = Parameter('./gram/')
    max_features = Parameter(10000)

    def __init__(self, h, D, category=None, task_type=None):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type

    def require(self):
        return [
            BagCountTask(h, self.D, self.category, self.task_type)
            for h in range(self.h + 1)
        ]

    def __taskid__(self):
        s = 'BagNodeIndexTask_%d_%d' % (self.h, self.D)
        if self.category is not None:
            s += '_' + str(containerHash(self.category))
        if self.task_type is not None:
            s += '_' + str(self.task_type)
        return s

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def run(self):
        count = {}
        for inp in self.input():
            with inp as i:
                C = i.query()
            for n, c in C.items():
                if n in count:
                    count[n] += c
                else:
                    count[n] = c

        index = [
            x[0] for x in sorted(
                list(count.items()), key=lambda k: k[1], reverse=True)
        ]

        len = min(len(index), self.max_features.value)
        if len < 0:
            len = len(index)
        index = index[:len]
        index = {k: i for i, k in enumerate(index)}
        with self.output() as o:
            o.emit(index)
Ejemplo n.º 9
0
class BenchSpecTask(Task):
    out_dir = Parameter('./out/graph/')

    def __init__(self, programs, config, spec):
        self.programs = programs
        self.config = config
        self.spec = spec

    def require(self):
        return [RunCPATask(p, self.config, self.spec) for p in self.programs]

    def __taskid__(self):
        return "BenchSpecTask_" + self.spec[-10:]

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def run(self):
        out = {}
        for inp in self.input():
            with inp as result:
                out[result['program']] = result
                del out[result['program']]['program']

        with self.output() as o:
            o.emit(out)
Ejemplo n.º 10
0
class WLCollectorTask(Task):
    out_dir = Parameter('./gram/')

    def __init__(self, graphs, h, D):
        self.graphs = graphs
        self.h = h
        self.D = D

    def require(self):
        task = []
        if self.h > 0:
            task.append(WLCollectorTask(self.graphs, self.h - 1, self.D))
        for g in self.graphs:
            task.append(Optional(PrepareKernelTask(g, self.h, self.D)))
        return task

    def output(self):
        return ManagedTarget(self)

    def __taskid__(self):
        return "WLCollector_%d_%d_%s" %\
                    (self.h, self.D,
                     str(containerHash(self.graphs, large=True)))

    def __repr__(self):
        return 'WLCollector(h: %d, D: %d)' % (self.h, self.D)

    def run(self):
        M = {}
        s = 0
        if self.h > 0:
            with self.input()[0] as i:
                M = i.query()
            s = 1

        paths = {}

        for i in range(s, len(self.input())):
            inputDep = self.input()[i]
            g = self.graphs[i - s]
            if inputDep is None:
                if g in M:
                    del M[g]
            else:
                paths[g] = inputDep

        for g, p in paths.items():
            if g not in M:
                M[g] = {}
            if self.h not in M[g]:
                M[g][self.h] = {}
            with p as pin:
                G = pin.query()
            count = G.graph['label_count']
            del G
            for n, c in count.items():
                M[g][self.h][n] = c

        with self.output() as o:
            o.emit(M)
Ejemplo n.º 11
0
class ExtractKernelBagTask(Task):
    out_dir = Parameter('./gram/')

    def __init__(self, graph, h, D):
        self.graph = graph
        self.h = h
        self.D = D

    def require(self):
        return PrepareKernelTask(self.graph, self.h, self.D)

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return 'ExtractKernelBagTask_%s_%d_%d' % (self.graph, self.h, self.D)

    def __repr__(self):
        return 'ExtractKernelBagTask(graph: %s, h: %d, D: %d)' %\
                (self.graph, self.h, self.D)

    def run(self):
        with self.input()[0] as i:
            G = i.query()

        with self.output() as o:
            o.emit(G.graph['label_count'])
Ejemplo n.º 12
0
class MongoSetupTask(Task):
    collection = Parameter('graph_sim')

    def __init__(self, graphs, h, maxDepth):
        self.graphs = graphs
        self.h = h
        self.maxDepth = maxDepth

    def require(self):
        out = [
                MongoWLTask(g, self.h, self.maxDepth)
                for g in self.graphs
            ]

        out.append(MongoGraphLabelTask(self.graphs))
        return out

    def __taskid__(self):
        return 'MongoSetupTask_%d_%d' % (
            self.h, self.maxDepth
        )

    def output(self):
        return MongoResourceTarget(
            self.collection.value,
            'graph_id', 'graphs'
        )

    def run(self):
        pass
Ejemplo n.º 13
0
class PescoWLTransformerTask(Task):
    out_dir = Parameter("./wlj/")

    def __init__(self, path, max_i, max_D):
        self.path = path
        self.max_i = max_i
        self.max_D = max_D

    def require(self):
        return PescoGraphTask(self.path)

    def __taskid__(self):
        return "PescoWLTransformerTask_%s_%d_%d" % (self.path.replace("/", "_").replace(".", "_"), self.max_i, self.max_D)

    def output(self):
        return LocalTarget(
            self.out_dir.value + self.__taskid__() + ".json", service=JsonService
        )

    def run(self):

        with self.input()[0] as i:
            G = generate_bag.parse_dfs_nx(
                json.load(i)
            )

        stats = {
            "nodes": G.number_of_nodes(),
            "edges": G.size(),
            "max_indegree": max(G.in_degree()),
            "max_outdegree": max(G.out_degree())
        }

        bags = {}

        for d in range(self.max_D, 0, -1):
            generate_bag.truncate(G, d)

            relabel = {}
            for i in range(self.max_i + 1):

                if i not in bags:
                    bags[i] = {}

                if d not in bags[i]:
                    bags[i][d] = {}

                bag = generate_bag.labelCount(G, relabel)

                bags[i][d] = bag

                relabel = generate_bag.wlGraphRelabel(G, relabel)

        with self.output() as o:
            o.emit({
                'statistics': stats,
                'kernel_bag': bags
            })
Ejemplo n.º 14
0
class BagMDSTask(Task):
    """DEPRECATED."""

    out_dir = Parameter('./gram/')

    def __init__(self, h, D, category=None,
                 task_type=None, kernel='linear'):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type
        self.kernel = kernel

    def require(self):
        h = [h for h in range(self.h+1)]
        return [BagNormalizeGramTask(h, self.D, self.category,
                                     self.task_type,
                                     self.kernel)]

    def __taskid__(self):
        cat = 'all'
        if self.category is not None:
            cat = '_'.join(enumerateable(self.category))

        tt = ''
        if self.task_type is not None:
            tt = '_'+str(self.task_type)

        return 'BagMDSTask_%d_%d_%s_%s' % (self.h, self.D, self.kernel,
                                           cat
                                           )\
            + tt

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(
            LocalTarget(path, service=JsonService)
        )

    def run(self):
        with self.input()[0] as i:
            D = i.query()
            graphIndex = D['graphIndex']
            X = np.array(D['data'])
            del D

        dis = np.ones(X.shape, dtype=X.dtype) - X

        mds = MDS(n_components=2, dissimilarity="precomputed", n_init=10)
        X_r = mds.fit_transform(dis)
        stress = mds.stress_

        with self.output() as o:
            o.emit({
                'graphIndex': graphIndex,
                'data': X_r.tolist(),
                'stress': stress
            })
Ejemplo n.º 15
0
class BagGraphIndexTask(Task):
    """Generate a index for graph id."""
    out_dir = Parameter('./gram/')

    def __init__(self, h, D, category=None, task_type=None):
        """
        Init Task.

        h: iteration depth for loading task
        D: AST depth for loading task
        category: Task category (e.g. array-examples)
        task_type: type of task (e.g. reach for reachability problems)

        Caution: Ids between iterations should be the same. They can change
        between AST depths. Standard usage: Generate graph index only for
        one iteration depth (e.g. h=0) and use the index for all
        remaining cases.
        """
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type

    def require(self):
        """Load a filter task."""
        return BagFilterTask(self.h, self.D,
                             self.category, self.task_type)

    def __taskid__(self):
        """Task id."""
        s = 'BagGraphIndexTask_%d' % (self.D)
        if self.category is not None:
            s += '_'+str(containerHash(self.category))
        if self.task_type is not None:
            s += '_'+str(self.task_type)
        return s

    def output(self):
        """Output as Json."""
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(
            LocalTarget(path, service=JsonService)
        )

    def run(self):
        """Index the ids of the given bag."""
        with self.input()[0] as i:
            D = i.query()

        out = {}

        for k in D:
            indexMap(k, out)

        with self.output() as o:
            o.emit(out)
Ejemplo n.º 16
0
class FSKernelTask(Task):
    out_dir = Parameter('./gram/')

    def __init__(self,
                 transform_expression,
                 kernel,
                 h,
                 D,
                 category=None,
                 task_type=None):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type
        self.transform_expression = transform_expression
        self.kernel = kernel

    def require(self):
        return FSFeatureTransformTask(self.transform_expression, self.h,
                                      self.D, self.category, self.task_type)

    def __taskid__(self):
        return 'FSKernelTask_%s' % (str(
            containerHash(list(self.get_params().items()))))

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def run(self):
        with self.input()[0] as inp:
            D = inp.query()

        graphIndex = D['graphIndex']
        X_train = D['X_train']
        X_test = D['X_test']
        train_l = X_train.shape[0]
        kernel = select_full(self.kernel)

        if kernel is None:
            raise ValueError('Unknown kernel %s' % self.kernel)

        X = vstack([X_train, X_test])

        X = normalize_gram(kernel(X))

        X_train = X[:train_l, :train_l]
        X_test = X[train_l:, :train_l]

        with self.output() as o:
            o.emit({
                'params': self.get_params(),
                'graphIndex': graphIndex,
                'data': X_train.tolist(),
                'test': X_test.tolist()
            })
Ejemplo n.º 17
0
class SVDFeatureTask(Task):
    out_dir = Parameter('./gram/')
    components = Parameter(1000)

    def __init__(self, h, D, category=None, task_type=None):
        self.h = h
        self.D = D
        self.category = category
        self.task_type = task_type

    def require(self):
        return PreparedFeatureTask(self.h, self.D, self.category,
                                   self.task_type)

    def __taskid__(self):
        s = 'SVDFeatureTask_%d_%d' % (self.h, self.D)
        if self.category is not None:
            s += '_' + str(containerHash(self.category))
        if self.task_type is not None:
            s += '_' + str(self.task_type)
        return s

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def run(self):
        with self.input()[0] as i:
            D = i.query()

        graphIndex = D['graphIndex']

        X = coo_matrix((D['data'], (D['rows'], D['columns'])),
                       shape=(D['row_shape'], D['column_shape']),
                       dtype=np.float64).tocsr()
        svd = TruncatedSVD(n_components=self.components.value)

        X = svd.fit_transform(X)
        print('Reduced features: %s' % str(X.shape))

        with self.output() as o:
            o.emit({'graphIndex': graphIndex, 'matrix': X.tolist()})
Ejemplo n.º 18
0
class MongoResourceTarget:
    auth = Parameter(None)
    url = Parameter('example.org')
    database = Parameter('db')

    def __init__(self, collection_name, index, key):
        self.collection_name = collection_name
        self.index = index
        self.key = key

    def __enter__(self):
        self._connection = setup_client(self.url.value, self.auth.value)
        self.db = self._connection[self.database.value]
        self.collection = self.db[self.collection_name]
        resource = MongoResource(self.collection, self.index, self.key)
        resource.db = self.db
        return resource

    def __exit__(self, type, value, tb):
        self.collection = None

    def exists(self):
        return False

    def __getstate__(self):
        return {
            'auth': self.auth.value,
            'url': self.url.value,
            'db': self.database.value,
            'collection': self.collection_name,
            'index': self.index,
            'key': self.key
        }

    def __setstate__(self, state):
        self.auth.value = state['auth']
        self.url.value = state['url']
        self.database.value = state['db']
        self.collection_name = state['collection']
        self.index = state['index']
        self.key = state['key']
Ejemplo n.º 19
0
class SVCompRanking(Task):
    out_dir = Parameter("./index/")

    def __init__(self, svcomp_name, directory, csv=False):
        self.directory = directory
        self.svcomp_name = svcomp_name
        self.csv = csv

    def require(self):
        return SVCompIndex(
            self.svcomp_name, self.directory,
            self.csv
        )

    def __taskid__(self):
        return "SVCompRanking_%s" % self.svcomp_name

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    @staticmethod
    def common_tools(L):
        tools = {}
        counter = 0

        for k, V in L.items():
            for prop, V in V.items():
                counter += 1
                for tool in V.keys():
                    if tool == 'name':
                        continue
                    if tool not in tools:
                        tools[tool] = 0
                    tools[tool] += 1
        result = []
        for tool, c in tools.items():
            if c >= counter-5 and 'cpa-bam' not in tool:
                result.append(tool)
        return result

    def run(self):
        with self.input()[0] as i:
            L = i.query()

        tools = SVCompRanking.common_tools(L)

        ranks = rank_tools(L, tools)

        with self.output() as o:
            o.emit(ranks)
Ejemplo n.º 20
0
class PescoSumGramTask(Task):
    out_dir = Parameter("./gram/")

    def __init__(self, paths, i, d, max_i, max_D,
                 svcomp_name, directory, csv=False,
                 kernel='linear'):
        self.svcomp_name = svcomp_name
        self.paths = paths
        self.i = i
        self.d = d
        self.max_i = max_i
        self.max_D = max_D
        self.directory = directory
        self.csv = csv
        self.kernel = kernel

    def require(self):
        return [
            PescoGramTask(
                self.paths, i, self.d, self.max_i,
                self.max_D, self.svcomp_name, self.directory,
                self.csv, self.kernel
            ) for i in range(self.i+1)
        ]

    def __taskid__(self):
        return "PescoSumGramTask_%s_%s_%d_%d" % (self.svcomp_name, self.kernel, self.i, self.d)

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    def run(self):
        GR = None

        for inp in self.input():
            with inp as i:
                gram = np.array(
                    i.query()
                )
            if GR is None:
                GR = gram
            else:
                GR += gram
            del gram

        data = GR.tolist()

        with self.output() as o:
            o.emit(data)
Ejemplo n.º 21
0
class EvaluationAndSettingTask(Task):
    out_dir = Parameter('./evaluation/')

    def __init__(self, graphs, h_Set, D_Set):
        self.graphs = graphs
        self.h_Set = h_Set
        self.D_Set = D_Set

    def require(self):
        return [
            EvaluationTask(self.graphs, self.h_Set, self.D_Set),
            GraphIndexTask()
        ]

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return "EvaluationAndSetting_%s_%s_%s" %\
                    (str(containerHash(self.graphs)),
                     str(containerHash(self.h_Set)),
                     str(containerHash(self.D_Set)))

    def __repr__(self):
        return 'EvaluationAndSetting(h: %d, D: %d)' % (self.h, self.D)

    def run(self):
        with self.input()[0] as i:
            param = i.query()

        with self.input()[1] as i:
            index = i.query()

        categories = []
        for c, D in index['categories'].items():
            for g in self.graphs:
                if g in D:
                    categories.append(c)
                    break

        D = {}
        D['categories'] = categories
        D['h_Set'] = self.h_Set
        D['D_Set'] = self.D_Set

        param['setting'] = D

        with self.output() as o:
            o.emit(param)
Ejemplo n.º 22
0
class NormalizedWLKernelTask(Task):
    out_dir = Parameter('./gram/')
    custom_kernel = Parameter(None)

    def __init__(self, graphs, h, D):
        self.graphs = graphs
        self.h = h
        self.D = D

    def require(self):
        if self.custom_kernel.value is None:
            return WLKernelTask(self.graphs, self.h, self.D)
        else:
            return CustomKernelTask(self.custom_kernel.value, self.graphs,
                                    self.h, self.D)

    def output(self):
        return ManagedTarget(self)

    def __taskid__(self):
        return "NormWLKernel_%d_%d_%s" %\
                    (self.h, self.D,
                     str(containerHash(self.graphs, large=True)))

    def __repr__(self):
        return 'NormalizedKernel(h: %d, D: %d)' % (self.h, self.D)

    def run(self):
        with self.input()[0] as i:
            graphIndex, GR = i.query()

        D = diags(1 / np.sqrt(GR.diagonal()))

        GR = D * GR * D

        with self.output() as o:
            o.emit((graphIndex, GR))
Ejemplo n.º 23
0
class WVSimilarWords(Task):
    out_dir = Parameter('./w2v/')

    def __init__(self, graph_list, length, h, D):
        self.graph_list = graph_list
        self.h = h
        self.D = D
        self.length = length

    def require(self):
        out = [
            WVVocabulary(self.graph_list, self.length, self.h, self.D),
            WVEmbeddingTask(self.graph_list, self.length, self.h, self.D)
        ]

        return out

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return 'W2VSimilarWords_%d_%d_%d' % (self.h, self.D,
                                             containerHash(self.graph_list))

    def run(self):
        with self.input()[0] as i:
            vocab = i.query()
        with self.input()[1] as i:
            embedding = np.array(i.query())

        inv_vocab = [None] * len(vocab)
        for k, v in vocab.items():
            inv_vocab[v[0]] = k
        inv_vocab = inv_vocab

        dis = cdist(embedding, embedding, 'cosine')
        arg_sort = np.argsort(dis, axis=1)[:, 1:6]

        near = {}

        for i, k in enumerate(inv_vocab):
            row = arg_sort[i]
            near[k] = []
            for j in range(row.shape[0]):
                near[k].append([inv_vocab[row[j]], 1 - dis[i, j]])

        with self.output() as o:
            o.emit(near)
Ejemplo n.º 24
0
class WVGraphSentenceTask(Task):
    out_dir = Parameter('./w2v/sentences/')

    def __init__(self, name, h, D):
        self.name = name
        self.h = h
        self.D = D

    def require(self):
        return PrepareKernelTask(self.name, self.h, self.D)

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.txt'
        return FileTarget(path)

    def __taskid__(self):
        return 'W2VGraphSentence_%s_%d_%d' % (self.name, self.h, self.D)

    def run(self):
        with self.input()[0] as i:
            G = i.query()

        L = []

        with self.output() as output:
            for node in G:

                in_nodes = []
                ast_nodes = []

                for in_node, _, _, d in G.in_edges(node,
                                                   keys=True,
                                                   data='type'):
                    if d is EdgeType.se:
                        ast_nodes.append(in_node)
                    elif d is EdgeType.de:
                        in_nodes.append(in_node)

                in_nodes.extend(collect_ast(G, ast_nodes))

                if len(in_nodes) == 0:
                    continue

                in_nodes = [G.node[n]['label'] for n in in_nodes]

                output.write(
                    str(G.node[node]['label']) + ' ' + ' '.join(in_nodes) +
                    '\n')
Ejemplo n.º 25
0
class ExtractInfoTask(Task):
    out_dir = Parameter('./ranking/')

    def __init__(self, graphs):
        self.graphs = graphs

    def require(self):
        return GraphIndexTask()

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return "ExtractInfoTask_%s" % containerHash(self.graphs)

    def run(self):
        with self.input()[0] as i:
            index = i.query()
            GraphIndexTask.convert_b64(index)

        tools = index['stats']

        ranking = {}
        for g in self.graphs:
            gKey = index['index'][g]
            score_rank = {}
            for cat, D in tools.items():
                for t, D in D.items():
                    if gKey not in D.index:
                        continue
                    d = D.loc[gKey]
                    status = d['status']
                    expected = d['expected_status']
                    state = 'false'
                    if (status is Status.false and expected is Status.false)\
                       or (status is Status.true and expected is Status.true):
                        state = 'correct'

                    time = d['cputime']

                    score_rank[t] = {'solve': state, 'time': time}

            ranking[g] = score_rank

        with self.output() as pdfile:
            pdfile.emit(ranking)
Ejemplo n.º 26
0
class ExtractKernelEntitiesTask(Task):
    out_dir = Parameter('./gram/')

    def __init__(self, graphs, h, D):
        self.graphs = graphs
        self.h = h
        self.D = D

    def require(self):
        out = [ExtractInfoTask(self.graphs), GraphIndexTask()]

        for g in self.graphs:
            out.append(Optional(ExtractKernelBagTask(g, self.h, self.D)))

        return out

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return 'ExtractKernelEntitiesTask_%d_%d_%s' % (
            self.h, self.D, str(containerHash(self.graphs)))

    def run(self):
        with self.input()[0] as i:
            info = i.query()

        with self.input()[1] as i:
            index = i.query()['index']

        out_dict = {}

        for i, g in enumerate(self.graphs):
            name = index[g]

            if self.input()[i + 2] is None:
                continue

            with self.input()[i + 2] as bag_input:
                bag = bag_input.query()

            out_dict[g] = {'file': name, 'kernel_bag': bag, 'label': info[g]}

        with self.output() as out_file:
            out_file.emit(out_dict)
Ejemplo n.º 27
0
class WVVocabulary(Task):
    out_dir = Parameter('./w2v/')

    def __init__(self, graph_list, length, h, D):
        self.graph_list = graph_list
        self.h = h
        self.D = D
        self.length = length

    def require(self):
        return [
            WVGraphSentenceTask(name, self.h, self.D)
            for name in self.graph_list
        ]

    def output(self):
        path = self.out_dir.value + self.__taskid__() + '.json'
        return CachedTarget(LocalTarget(path, service=JsonService))

    def __taskid__(self):
        return 'W2VVocabulary_%d_%d_%d' % (self.h, self.D,
                                           containerHash(self.graph_list))

    def run(self):
        vocab = {}
        overall = 0
        for inp in self.input():
            with inp as i:
                for line in i.readlines():
                    for w in line.split():
                        if w not in vocab:
                            vocab[w] = 0
                        vocab[w] += 1
                    overall += 1
        vocab = [
            x for x in sorted(
                list(vocab.items()), key=lambda x: x[1], reverse=True)
        ][:self.length]
        vocab = {k[0]: (v, k[1]) for v, k in enumerate(vocab)}

        print('### Parsed %s samples ###' % overall)

        with self.output() as o:
            o.emit(vocab)
Ejemplo n.º 28
0
class DatasetLabelTask(Task):
    out_dir = Parameter("./dataset/")

    def __init__(self, path, svcomp_name, directory, csv=False):
        self.path = path
        self.svcomp_name = svcomp_name
        self.directory = directory
        self.csv = csv

    def require(self):
        return [
            PescoGraphTask(self.path),
            SVCompRanking(self.svcomp_name, self.directory,
                          self.csv)
        ]

    def __taskid__(self):
        return "DatasetLabelTask_%s_%s" % (self.svcomp_name, self.path.replace("/", "_").replace(".", "_"))

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    def run(self):
        with self.input()[1] as inp:
            rankings = inp.query()

        ranking = []
        if self.path in rankings:
            ranking = rankings[self.path]
        else:
            raise ValueError("Unknown path %s" % self.path)

        with self.input[0] as inp:
            G = json.load(inp)

        with self.output() as o:
            o.emit({
                'file': self.path,
                'svcomp': self.svcomp_name,
                'rankings': ranking,
                'graph': G
            })
Ejemplo n.º 29
0
class SVCompGraphIndexTask(Task):
    out_dir = Parameter("./index/")

    def __init__(self, svcomp_name, directory, csv=False):
        self.directory = directory
        self.svcomp_name = svcomp_name
        self.csv = csv

    def require(self):
        return SVCompIndex(
            self.svcomp_name, self.directory,
            self.csv
        )

    def __taskid__(self):
        return "SVCompGraphIndexTask_%s" % self.svcomp_name

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    def run(self):
        with self.input()[0] as i:
            L = i.query()

        graphIndex = {'counter': 0}
        categories = {}

        for k, V in L.items():
            if k not in graphIndex:
                graphIndex[k] = graphIndex['counter']
                graphIndex['counter'] += 1
            for prop in V.keys():
                if prop not in categories:
                    categories[prop] = []
                categories[prop].append(graphIndex[k])

        with self.output() as o:
            o.emit({
                'index': graphIndex,
                'categories': categories
            })
Ejemplo n.º 30
0
class PescoNormGramTask(Task):
    out_dir = Parameter("./gram/")

    def __init__(self, paths, i, d, max_i, max_D,
                 svcomp_name, directory, csv=False,
                 kernel='linear'):
        self.svcomp_name = svcomp_name
        self.paths = paths
        self.i = i
        self.d = d
        self.max_i = max_i
        self.max_D = max_D
        self.directory = directory
        self.csv = csv
        self.kernel = kernel

    def require(self):
        return PescoSumGramTask(
            self.paths, self.i, self.d,
            self.max_i, self.max_D,
            self.svcomp_name, self.directory,
            self.csv, self.kernel
        )

    def __taskid__(self):
        return "PescoNormGramTask_%s_%s_%d_%d" % (self.svcomp_name, self.kernel, self.i, self.d)

    def output(self):
        return LocalTarget(
            self.out_dir.value+self.__taskid__()+".json", service=JsonService
        )

    def run(self):
        with self.input()[0] as i:
            gram = np.array(
                i.query()
            )

        data = normalize_gram(gram).tolist()

        with self.output() as o:
            o.emit(data)