Example #1
0
def get_context():
    global global_tc
    with lock:
        if global_tc is None:
            sparktkconf_dict = {
                'spark.driver.memory': "3712m",
                'spark.executor.instances': '5',
                'spark.driver.cores': '1',
                'spark.driver.extraJavaOptions': '-Xmx2688m',
                'spark.driver.maxPermSize': '512m',
                'spark.driver.maxResultSize': '2g',
                'spark.dynamicAllocation.enabled': 'true',
                'spark.dynamicAllocation.maxExecutors': '116',
                'spark.dynamicAllocation.minExecutors': '1',
                'spark.executor.cores': '1',
                'spark.executor.extrajavaoptions': '-Xmx2688m',
                'spark.executor.memory': '3200m',
                'spark.shuffle.io.preferDirectBufs': 'false',
                'spark.shuffle.service.enabled': 'true',
                'spark.yarn.am.waitTime': '1000000',
                'spark.yarn.driver.memoryOverhead': '384',
                'spark.yarn.executor.memoryOverhead': '384',
                'spark.eventLog.enabled': 'false',
                'spark.sql.shuffle.partitions': '6'
            }
            if config.run_mode:
                global_tc = stk.TkContext(master='yarn-client',
                                          other_libs=[daaltk])

            else:
                global_tc = stk.TkContext(other_libs=[daaltk])
    return global_tc
Example #2
0
def get_context():
    global global_tc
    with lock:
        if global_tc is None:
            sparktkconf_dict = spark_context_config.get_spark_conf()
            if config.run_mode:
                global_tc = stk.TkContext(master='yarn-client',
                                          extra_conf_dict=sparktkconf_dict,
                                          py_files=udf_files)
            else:
                global_tc = stk.TkContext(py_files=udf_files)

        return global_tc
Example #3
0
    def test_graph_example(self):
        """Documentation test for classifiers"""
        # Get a context from the spark-tk library
        tc = sparktk.TkContext()
        # Graphs are composed of 2 sets, one of verticess, and one of edges
        # that connect exactly two (possibly not distinct) verticees.
        # The degree of a vertex is the number of edges attached to it

        # Below we build a frame using a vertex list and an edge list.

        vertex_frame = tc.frame.create(
            [["vertex1"], ["vertex2"], ["vertex3"], ["vertex4"], ["vertex5"]],
            [("id", str)])
        edge_frame = tc.frame.create(
            [["vertex2", "vertex3"], ["vertex2", "vertex1"],
             ["vertex2", "vertex4"], ["vertex2", "vertex5"]], [("src", str),
                                                               ("dst", str)])

        # The graph is a center vertex on vertex2, with 4 verticess each
        # attached to the center vertex . This is known as a star graph, in
        # this configuration it can be visualized as a plus sign

        # To Create a graph first you define the vertices, and then the edges

        graph = tc.graph.create(vertex_frame, edge_frame)

        # get the degrees, which have known values
        degrees = graph.degrees()

        degree_list = degrees.take(5)
        known_list = [[u'vertex4', 1], [u'vertex1', 1], [u'vertex5', 1],
                      [u'vertex2', 4], [u'vertex3', 1]]

        self.assertItemsEqual(known_list, degree_list)
Example #4
0
def get_context():
    global global_tc
    with lock:
        if global_tc is None:
            global_tc = stk.TkContext()
    return global_tc