Esempio n. 1
0
    def __init__(self, language, gateway, **kwargs):
        '''
        Constructor
        @param language: The language code for the proper initialization of this language-dependent tool
        @type language: string
        @param gateway: An already initialized Py4j java gateway
        @type gateway: py4j.java_gateway.JavaGateway
        '''
        self.language = language
        #self.jvm = JVM(java_classpath)
        #socket_no = self.jvm.socket_no
        #gatewayclient = GatewayClient('localhost', socket_no)
        #gateway = JavaGateway(gatewayclient, auto_convert=True, auto_field=True)
        #sys.stderr.write("Initialized local Java gateway with pid {} in socket {}\n".format(self.jvm.pid, socket_no))
    
        self.meteor_view = gateway.new_jvm_view()
        #import necessary java packages from meteor jar
        java_import(self.meteor_view, 'edu.cmu.meteor.scorer.*')
        java_import(self.meteor_view, 'edu.cmu.meteor.util.*')
#        java_import(self.meteor_view, '')
        
        #pass the language setting into the meteor configuration object
        config = self.meteor_view.MeteorConfiguration();
        config.setLanguage(language);
        #initialize object with the given config
        sys.stderr.write("If next line displays error, it is not critical, but METEOR language-specific transducer must be installed.")
        self.scorer = self.meteor_view.MeteorScorer(config)
Esempio n. 2
0
def start_gateway_server():
    classPath = compute_classpath(DDF_HOME)
    # launch GatewayServer in a new process
    javaopts = os.getenv('JAVA_OPTS')
    if javaopts is not None:
        javaopts = javaopts.split()
    else:
        javaopts = []
    #command = ["java", "-classpath", classPath] + ["-Dlog4j.configuration=file:"+ DDF_HOME + "/core/conf/local/ddf-local-log4j.properties"] + ["py4j.GatewayServer", "--die-on-broken-pipe", "0"]
    command = ["java", "-classpath", classPath] + javaopts + ["py4j.GatewayServer", "--die-on-broken-pipe", "0"]
    
    proc = Popen(command, stdout = PIPE, stdin = PIPE, preexec_fn = preexec_func)
    # get the port of the GatewayServer
    port = int(proc.stdout.readline())

    class JavaOutputThread(Thread):
        def __init__(self, stream):
            Thread.__init__(self)
            self.daemon = True
            self.stream = stream

        def run(self):
            while True:
                line = self.stream.readline()
                sys.stderr.write(line)
    JavaOutputThread(proc.stdout).start()
    # connect to the gateway server
    gateway = JavaGateway(GatewayClient(port = port), auto_convert = False)
    java_import(gateway.jvm, "io.ddf.*")
    java_import(gateway.jvm, "io.ddf.spark.*")
    return gateway
Esempio n. 3
0
 def __init__(self, mrgeo):
     self._mrgeo = mrgeo
     jvm = self._mrgeo._get_jvm()
     # Import the raster map op test support class and all other needed classes
     java_import(jvm, "org.mrgeo.mapalgebra.InlineCsvMapOp")
     self._jvm = jvm
     self._sparkContext = mrgeo.sparkContext
 def _do_init(self, *args, **kwargs):
     # Modifies base _do_init to add a Java-Cassandra SparkContext (jcsc)
     # to the instance
     super(CassandraSparkContext, self)._do_init(*args, **kwargs)
     java_import(self._jvm, "com.datastax.spark.connector.CassandraJavaUtil")
     java_import(self._jvm, "com.datastax.spark.connector.RowConvertingIterator")
     self._jcsc = self._jvm.CassandraJavaUtil.javaFunctions(self._jsc)
Esempio n. 5
0
def singlethread(java_classpath):
    print "Thread starting"
    
    jvm = JVM(java_classpath, dir_path)
    socket_no = self.jvm.socket_no
    gatewayclient = GatewayClient('localhost', socket_no)
    gateway = JavaGateway(gatewayclient, auto_convert=True, auto_field=True)
    sys.stderr.write("Initialized global Java gateway with pid {} in socket {}\n".format(self.jvm.pid, socket_no))

    
    gatewayclient = GatewayClient('localhost', socket_no)
    print "Gclient started"
    gateway = JavaGateway(gatewayclient, auto_convert=True, auto_field=True)
    print "Java Gateway started"
    #create a new view for the jvm
    meteor_view = gateway.new_jvm_view()
    #import required packages
    java_import(meteor_view, 'edu.cmu.meteor.scorer.*')
    #initialize the java object
    java_import(meteor_view, 'edu.cmu.meteor.util.*')
    print "Modules imported"
    #pass the language setting into the meteor configuration object
    config = meteor_view.MeteorConfiguration();
    config.setLanguage("en");
    scorer = meteor_view.MeteorScorer(config)
    print "object initialized"
    #run object function
    stats = scorer.getMeteorStats("Test sentence", "Test sentence !");
    print stats.score
    return 1
def main():
    if len(sys.argv) != 3:
        print >> sys.stderr, "Usage: example <keyspace_name> <column_family_name>"
        sys.exit(-1)

    keyspace_name = sys.argv[1]
    column_family_name = sys.argv[2]

    # Valid config options here https://github.com/datastax/spark-cassandra-connector/blob/master/doc/1_connecting.md
    conf = SparkConf().set("spark.cassandra.connection.host", "127.0.0.1")

    sc = SparkContext(appName="Spark + Cassandra Example",
                      conf=conf)

    # import time; time.sleep(30)
    java_import(sc._gateway.jvm, "com.datastax.spark.connector.CassandraJavaUtil")
    print sc._jvm.CassandraJavaUtil

    users = (
        ["Mike", "Sukmanowsky"],
        ["Andrew", "Montalenti"],
        ["Keith", "Bourgoin"],
    )
    rdd = sc.parallelize(users)
    print rdd.collect()
Esempio n. 7
0
def scala_set_to_set(ctx, x):
    from py4j.java_gateway import java_import

    # import scala
    java_import(ctx._jvm, 'scala')

    # grab Scala's set converter and convert to a Python set
    return set(ctx._jvm.scala.collection.JavaConversions.setAsJavaSet(x))
    def get_smoothing_method(self, spark_context):
        java_import(spark_context._jvm, ClassNames.WEIGHTS)
        java_import(spark_context._jvm, ClassNames.WEIGHTED_MOVING_AVERAGE)
        java_weights = spark_context._jvm.Weights(self.__python_weights.limit())
        for index in range(self.__window_size):
            java_weights.add(self.__python_weights.get(index))

        return spark_context._jvm.WeightedMovingAverageMethod(self.__window_size, java_weights)
 def _connect(self, gateway, grammarfile):        
     module_view = gateway.new_jvm_view()      
     java_import(module_view, 'BParser')
     
     # get the application instance
     log.info("Grammar file: {}".format(grammarfile))
     self.bp_obj =  module_view.BParser(grammarfile)
     sys.stderr.write("got BParser object\n")
Esempio n. 10
0
def launch_gateway():
    if "MRGEO_GATEWAY_PORT" in os.environ:
        gateway_port = int(os.environ["MRGEO_GATEWAY_PORT"])
    else:
        # Launch the Py4j gateway using the MrGeo command so that we pick up the proper classpath

        script = find_script()

        # Start a socket that will be used by PythonGatewayServer to communicate its port to us
        callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        callback_socket.bind(('127.0.0.1', 0))
        callback_socket.listen(1)
        callback_host, callback_port = callback_socket.getsockname()
        env = dict(os.environ)
        env['_MRGEO_DRIVER_CALLBACK_HOST'] = callback_host
        env['_MRGEO_DRIVER_CALLBACK_PORT'] = str(callback_port)

        command = [script, "python", "-v", "-h", callback_host, "-p", str(callback_port)]

        # Launch the Java gateway.
        # We open a pipe to stdin so that the Java gateway can die when the pipe is broken
        # Don't send ctrl-c / SIGINT to the Java gateway:
        def preexec_func():
            signal.signal(signal.SIGINT, signal.SIG_IGN)

        proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)

        gateway_port = None
        # We use select() here in order to avoid blocking indefinitely if the subprocess dies
        # before connecting
        while gateway_port is None and proc.poll() is None:
            timeout = 1  # (seconds)
            readable, _, _ = select.select([callback_socket], [], [], timeout)
            if callback_socket in readable:
                gateway_connection = callback_socket.accept()[0]
                # Determine which ephemeral port the server started on:
                gateway_port = read_int(gateway_connection.makefile(mode="rb"))
                gateway_connection.close()
                callback_socket.close()

        if gateway_port is None:
            raise Exception("Java gateway process exited before sending the driver its port number")

    print("Talking with MrGeo on port " + str(gateway_port))

    # Connect to the gateway
    gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True)

    # Import the classes used by MrGeo
    java_import(gateway.jvm, "org.mrgeo.python.*")

    # Import classes used by Spark
    java_import(gateway.jvm, "org.apache.spark.SparkConf")
    java_import(gateway.jvm, "org.apache.spark.api.java.*")
    java_import(gateway.jvm, "org.apache.spark.api.python.*")
    java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")

    return gateway
Esempio n. 11
0
    def _getSome(self, value):
        java_import(self._jvm, "scala.Some")
        return self._jvm.Some(value)




# suite = TestLoader().loadTestsFromTestCase(MrGeoLocalIntegrationTests)
# TextTestRunner(verbosity=2).run(suite)
Esempio n. 12
0
 def setUp(self):
     conf = SparkConf().setAppName('test').setMaster('local[*]')
     pwd = os.path.dirname(os.path.realpath(__file__))
     metastore_dir = os.path.abspath(os.path.join(pwd, '..',
                                                  'metastore_db'))
     silentremove(os.path.join(metastore_dir, "dbex.lck"))
     silentremove(os.path.join(metastore_dir, "db.lck"))
     self.sc = SparkContext(conf=conf)
     self.jvm = self.sc._gateway.jvm
     java_import(self.jvm, "org.apache.spark.sql.*")
Esempio n. 13
0
def createColor(r, g, b):    

    global _gateway
    if _gateway is None:
        _gateway = JavaGateway()
        
    jvm = _gateway.jvm

    java_import(jvm, 'org.eclipse.swt.graphics.*')
    
    return jvm.Color(None, r, g, b)
Esempio n. 14
0
def createHistogramBound(position, r, g, b):    

    global _gateway
    if _gateway is None:
        _gateway = JavaGateway()
        
    jvm = _gateway.jvm

    java_import(jvm, 'org.eclipse.dawnsci.plotting.api.histogram.*')
    
    return jvm.HistogramBound(position, r, g, b)
Esempio n. 15
0
def getService(serviceClass):
    
    global _gateway
    if _gateway is None:
        _gateway = JavaGateway()
        
    jvm = _gateway.jvm
    
    java_import(jvm, 'org.dawb.common.services.*')
    
    return jvm.Activator.getService(serviceClass)
Esempio n. 16
0
    def __init__(self, _jvm, smvconfig):
        self._jvm = _jvm

        self.smvconfig = smvconfig
        self.dsRepoFactories = []

        from py4j.java_gateway import java_import
        java_import(self._jvm, "org.tresamigos.smv.python.SmvPythonHelper")
        java_import(self._jvm, "org.tresamigos.smv.DataSetRepoFactoryPython")

        self.helper = self._jvm.SmvPythonHelper
Esempio n. 17
0
def getPlottingSystem(plottingSystemName):
    
    global _gateway
    if _gateway is None:
        _gateway = JavaGateway()
        
    jvm = _gateway.jvm
    
    java_import(jvm, 'org.eclipse.dawnsci.plotting.api.*')
    
    return jvm.PlottingFactory.getPlottingSystem(plottingSystemName, True)
Esempio n. 18
0
def new_gateway_client():
    global __gateway_server_port

    if __gateway_server_port is None:
        __gateway_server_port = start_gateway_server()

    # connect to the gateway server
    gateway = JavaGateway(GatewayClient(port=__gateway_server_port), auto_convert=False)
    java_import(gateway.jvm, 'io.ddf.*')
    java_import(gateway.jvm, 'io.ddf.spark.*')
    return gateway
Esempio n. 19
0
    def _create_job(self):
        if not self._job:
            jvm = self._get_jvm()
            java_import(jvm, "org.mrgeo.job.*")

            appname = "PyMrGeo"

            self._job = jvm.JobArguments()
            java_gateway.set_field(self._job, "name", appname)

            # Yarn in the default
            self.useyarn()
Esempio n. 20
0
 def __set_file_type(self, jvm, file_type):
     java_import(jvm, ClassNames.FileType)
     file_types = {
         'CSV': jvm.FileType.CSV,
         'TSV': jvm.FileType.TSV
     }
     if file_type in file_types.values():
         self.__file_type = file_type
     elif file_type.upper() in file_types:
         self.__file_type = file_types[file_type.upper()]
     else:
         raise ValueError('"%s" is not a valid file type\nValid file types are CSV and TSV' % file_type)
Esempio n. 21
0
    def __init__(self, language, gateway, **kwargs):
        '''
        Constructor
        '''
        self.language = language
        ltool_view = gateway.new_jvm_view()
        java_import(ltool_view, 'org.languagetool.Languages')
        java_import(ltool_view, 'org.languagetool.JLanguageTool')

        if language=='ru':
            language = 'ru-RU' 
        
        tool_language = ltool_view.Languages.getLanguageForShortName(language)
        self.ltool = ltool_view.JLanguageTool(tool_language)        
Esempio n. 22
0
 def run(self):
     input = self.setupInputStreams(self.options.num_streams)
     output = self.setupOutputStream(input)
     output.count().pprint()
     sc = self.ssc.sparkContext
     java_import(sc._jvm, "org.apache.spark.streaming.scheduler.StatsReportListener")
     numBatches = int(self.options.total_duration / self.options.batch_duration)
     listener = sc._jvm.StatsReportListener(numBatches)
     self.ssc._jssc.addStreamingListener(listener)
     self.ssc.start()
     startTime = time.time()
     time.sleep(self.options.total_duration)
     self.ssc.stop(False, True)
     return self.processResults(listener)
Esempio n. 23
0
    def _ensure_initialized(cls):
        SparkContext._ensure_initialized()
        gw = SparkContext._gateway

        java_import(gw.jvm, "org.apache.spark.streaming.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.java.*")
        java_import(gw.jvm, "org.apache.spark.streaming.api.python.*")

        # start callback server
        # getattr will fallback to JVM, so we cannot test by hasattr()
        if "_callback_server" not in gw.__dict__ or gw._callback_server is None:
            gw.callback_server_parameters.eager_load = True
            gw.callback_server_parameters.daemonize = True
            gw.callback_server_parameters.daemonize_connections = True
            gw.callback_server_parameters.port = 0
            gw.start_callback_server(gw.callback_server_parameters)
            cbport = gw._callback_server.server_socket.getsockname()[1]
            gw._callback_server.port = cbport
            # gateway with real port
            gw._python_proxy_port = gw._callback_server.port
            # get the GatewayServer object in JVM by ID
            jgws = JavaObject("GATEWAY_SERVER", gw._gateway_client)
            # update the port of CallbackClient with real port
            jgws.resetCallbackClient(jgws.getCallbackClient().getAddress(), gw._python_proxy_port)

        # register serializer for TransformFunction
        # it happens before creating SparkContext when loading from checkpointing
        cls._transformerSerializer = TransformFunctionSerializer(
            SparkContext._active_spark_context, CloudPickleSerializer(), gw)
Esempio n. 24
0
def test_dir_jvmview_two():
    with example_app_process():
        with gateway() as g:
            view1 = g.new_jvm_view()
            view2 = g.new_jvm_view()
            helper_dir_jvmview(view1)
            helper_dir_jvmview(view2)

            # now give them different contents
            java_import(view1, "com.fourth.Class4")
            java_import(view2, "com.fiftg.Class5")

            assert sorted(dir(view1)) == [UserHelpAutoCompletion.KEY, "Class1", "Class2", "Class3", "Class4"]
            assert sorted(dir(view2)) == [UserHelpAutoCompletion.KEY, "Class1", "Class2", "Class3", "Class5"]
Esempio n. 25
0
    def copy(cls, srcfile, srcpath=None, dstpath=None, dstfile=None):
        jvm = cls.mrgeo._get_jvm()
        java_import(jvm, "org.mrgeo.hdfs.utils.HadoopFileUtils")
        java_import(jvm, "org.apache.hadoop.fs.Path")

        if srcpath is not None:
            src = srcpath
            if not src.endswith('/'):
                src += '/'
            src += srcfile
        else:
            src = srcfile

        if not os.path.exists(src):
            if os.path.exists(cls.inputdir + src):
                src = cls.inputdir + src

        if not os.path.exists(src):
            raise Exception("Source (" + src + ") is not a file or directory")

        if dstfile is not None:
            dst = dstfile
            if not dst.endswith('/'):
                dst += '/'
            dst += dstfile

            if not os.path.isfile(src):
                raise Exception("Source (" + src + ") is must be a file")

            if jvm.HadoopFileUtils.exists(dst):
                jvm.HadoopFileUtils.delete(dst)

            jvm.HadoopFileUtils.copyFileToHdfs(src, dst)

            return dst
        elif dstpath is not None:
            dst = dstpath
        else:
            dst = cls.inputhdfs

        basefile = os.path.basename(src)
        dstfile = dst + basefile

        if jvm.HadoopFileUtils.exists(dstfile):
            jvm.HadoopFileUtils.delete(dstfile)

        jvm.HadoopFileUtils.copyToHdfs(src, dst)

        return dstfile
Esempio n. 26
0
def create_gateway():
    """
    Initialize a gateway with default port and address
    :return: JavaGateway
    """
    gateway = None
    try:
        gateway = JavaGateway(GatewayClient(), auto_convert=True)

    except Py4JNetworkError as err:
        LOG.error("Failed to connect. Please make sure Spaceship gateway is running: %r", err)

    # import spaceship code
    java_import(gateway.jvm, "edu.gatech.sunlab.spaceship.api.py.*")
    return gateway
Esempio n. 27
0
    def comparevector(self, vector, testname):
        if self.GENERATE_BASELINE_DATA:
            self.savevector(vector, str(testname))
        else:
            jvm = self.mrgeo._get_jvm()
            # test = raster.mapop.toDataset(False)
            java_import(jvm, "org.mrgeo.hdfs.vector.DelimitedVectorReader")

            testvector = str(self.outputhdfs + testname + ".tsv")
            vector.ssave(testvector)
            expectedvector = str(self.inputdir + testname + ".tsv")
            vdp_expected = jvm.DataProviderFactory.getVectorDataProvider(
                expectedvector,
                jvm.DataProviderFactory.AccessMode.READ,
                jvm.HadoopUtils.createConfiguration())
            expected_geom_reader = vdp_expected.getVectorReader().get()

            vdp = jvm.DataProviderFactory.getVectorDataProvider(
                testvector,
                jvm.DataProviderFactory.AccessMode.READ,
                jvm.HadoopUtils.createConfiguration())
            self.assertTrue(vdp is not None)
            vector_reader = vdp.getVectorReader()
            self.assertTrue(vector_reader is not None)
            self.assertTrue(is_instance_of(self.mrgeo.gateway, vector_reader, jvm.DelimitedVectorReader))
            self.assertEquals(vdp_expected.getVectorReader().count(), vector_reader.count())
            geom_reader = vector_reader.get()
            self.assertTrue(geom_reader is not None)

            while expected_geom_reader.hasNext():
                expected_geom = expected_geom_reader.next()
                geom = geom_reader.next()
                self.assertTrue(geom is not None)
                self.assertEquals(expected_geom.type(), geom.type())
                self.assertAlmostEquals(float(expected_geom.getAttribute("COST_S")),
                                        float(geom.getAttribute("COST_S")), delta=0.001)
                self.assertAlmostEquals(float(expected_geom.getAttribute("DISTANCE_M")),
                                        float(geom.getAttribute("DISTANCE_M")), delta=0.001)
                self.assertAlmostEquals(float(expected_geom.getAttribute("MINSPEED_MPS")),
                                        float(geom.getAttribute("MINSPEED_MPS")), delta=0.001)
                self.assertAlmostEquals(float(expected_geom.getAttribute("MAXSPEED_MPS")),
                                        float(geom.getAttribute("MAXSPEED_MPS")), delta=0.001)
                self.assertAlmostEquals(float(expected_geom.getAttribute("AVGSPEED_MPS")),
                                        float(geom.getAttribute("AVGSPEED_MPS")), delta=0.001)

            # Should not be any more geometries in the actual output
            self.assertFalse(geom_reader.hasNext())
            jvm.HadoopFileUtils.delete(testvector)
Esempio n. 28
0
def java_gateway():
    from py4j.java_gateway import java_import, JavaGateway
    gateway = JavaGateway(auto_convert=True)
    jvm = gateway.jvm
    java_import(jvm, 'com.netflix.hystrix.util.HystrixRollingNumber')
    java_import(jvm, 'com.netflix.hystrix.util.HystrixRollingNumberEvent')
    java_import(jvm, 'com.netflix.hystrix.HystrixCommandProperties')
    java_import(jvm, 'com.netflix.hystrix.HystrixCommandProperties.Setter')
    return gateway
Esempio n. 29
0
    def createStream(ssc, zkQuorum, groupId, topics, kafkaParams={},
                     storageLevel=StorageLevel.MEMORY_AND_DISK_SER_2,
                     keyDecoder=utf8_decoder, valueDecoder=utf8_decoder):
        """
        Create an input stream that pulls messages from a Kafka Broker.

        :param ssc:  StreamingContext object
        :param zkQuorum:  Zookeeper quorum (hostname:port,hostname:port,..).
        :param groupId:  The group id for this consumer.
        :param topics:  Dict of (topic_name -> numPartitions) to consume.
                        Each partition is consumed in its own thread.
        :param kafkaParams: Additional params for Kafka
        :param storageLevel:  RDD storage level.
        :param keyDecoder:  A function used to decode key (default is utf8_decoder)
        :param valueDecoder:  A function used to decode value (default is utf8_decoder)
        :return: A DStream object
        """
        java_import(ssc._jvm, "org.apache.spark.streaming.kafka.KafkaUtils")

        kafkaParams.update({
            "zookeeper.connect": zkQuorum,
            "group.id": groupId,
            "zookeeper.connection.timeout.ms": "10000",
        })
        if not isinstance(topics, dict):
            raise TypeError("topics should be dict")
        jtopics = MapConverter().convert(topics, ssc.sparkContext._gateway._gateway_client)
        jparam = MapConverter().convert(kafkaParams, ssc.sparkContext._gateway._gateway_client)
        jlevel = ssc._sc._getJavaStorageLevel(storageLevel)

        def getClassByName(name):
            return ssc._jvm.org.apache.spark.util.Utils.classForName(name)

        try:
            array = getClassByName("[B")
            decoder = getClassByName("kafka.serializer.DefaultDecoder")
            jstream = ssc._jvm.KafkaUtils.createStream(ssc._jssc, array, array, decoder, decoder,
                                                       jparam, jtopics, jlevel)
        except Py4JError, e:
            # TODO: use --jar once it also work on driver
            if not e.message or 'call a package' in e.message:
                print "No kafka package, please put the assembly jar into classpath:"
                print " $ bin/spark-submit --driver-class-path external/kafka-assembly/target/" + \
                      "scala-*/spark-streaming-kafka-assembly-*.jar"
            raise e
Esempio n. 30
0
def start_gateway_server():
    classpath = compute_classpath(DDF_HOME)

    java_opts = os.getenv('JAVA_OPTS')
    if java_opts is not None:
        java_opts = java_opts.split()
    else:
        java_opts = []

    # set log options and memory configuration
    if not any([s.startswith('-Dlog4j.configuration') for s in java_opts]):
        java_opts += ['-Dlog4j.configuration=file:{}/core/conf/local/ddf-local-log4j.properties'.format(DDF_HOME)]
    if not any([s.startswith('-Xms') for s in java_opts]):
        java_opts += ['-Xms128m']
    if not any([s.startswith('-Xmx') for s in java_opts]):
        java_opts += ['-Xmx512m']
    if not any([s.startswith('-XX:MaxPermSize') for s in java_opts]):
        java_opts += ['-XX:MaxPermSize=512m']

    command = ['java', '-classpath', classpath] + java_opts + ['py4j.GatewayServer', '--die-on-broken-pipe', '0']

    # launch GatewayServer in a new process
    process = Popen(command, stdout=PIPE, stdin=PIPE, preexec_fn=pre_exec_func)

    # get the port of the GatewayServer
    port = int(process.stdout.readline())

    class JavaOutputThread(Thread):
        def __init__(self, stream):
            Thread.__init__(self)
            self.daemon = True
            self.stream = stream

        def run(self):
            while True:
                line = self.stream.readline()
                sys.stderr.write(line)

    JavaOutputThread(process.stdout).start()

    # connect to the gateway server
    gateway = JavaGateway(GatewayClient(port=port), auto_convert=False)
    java_import(gateway.jvm, 'io.ddf.*')
    java_import(gateway.jvm, 'io.ddf.spark.*')
    return gateway
Esempio n. 31
0
def import_flink_view(gateway):
    """
    import the classes used by PyFlink.
    :param gateway:gateway connected to JavaGateWayServer
    """
    # Import the classes used by PyFlink
    java_import(gateway.jvm, "org.apache.flink.table.api.*")
    java_import(gateway.jvm, "org.apache.flink.table.api.java.*")
    java_import(gateway.jvm, "org.apache.flink.table.api.dataview.*")
    java_import(gateway.jvm, "org.apache.flink.table.descriptors.*")
    java_import(gateway.jvm, "org.apache.flink.table.sources.*")
    java_import(gateway.jvm, "org.apache.flink.table.sinks.*")
    java_import(gateway.jvm, "org.apache.flink.table.types.*")
    java_import(gateway.jvm, "org.apache.flink.table.types.logical.*")
    java_import(gateway.jvm, "org.apache.flink.table.util.python.*")
    java_import(gateway.jvm, "org.apache.flink.api.common.python.*")
    java_import(gateway.jvm, "org.apache.flink.api.common.typeinfo.TypeInformation")
    java_import(gateway.jvm, "org.apache.flink.api.common.typeinfo.Types")
    java_import(gateway.jvm, "org.apache.flink.api.java.ExecutionEnvironment")
    java_import(gateway.jvm,
                "org.apache.flink.streaming.api.environment.StreamExecutionEnvironment")
Esempio n. 32
0
def launch_gateway():
    if "PYSPARK_GATEWAY_PORT" in os.environ:
        gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
    else:
        SPARK_HOME = os.environ["SPARK_HOME"]
        # Launch the Py4j gateway using Spark's run command so that we pick up the
        # proper classpath and settings from spark-env.sh
        on_windows = platform.system() == "Windows"
        script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
        submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
        if os.environ.get("SPARK_TESTING"):
            submit_args = "--conf spark.ui.enabled=false " + submit_args
        command = [os.path.join(SPARK_HOME, script)] + shlex.split(submit_args)

        # Start a socket that will be used by PythonGatewayServer to communicate its port to us
        callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        callback_socket.bind(('127.0.0.1', 0))
        callback_socket.listen(1)
        callback_host, callback_port = callback_socket.getsockname()
        env = dict(os.environ)
        env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
        env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port)

        # Launch the Java gateway.
        # We open a pipe to stdin so that the Java gateway can die when the pipe is broken
        if not on_windows:
            # Don't send ctrl-c / SIGINT to the Java gateway:
            def preexec_func():
                signal.signal(signal.SIGINT, signal.SIG_IGN)

            proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
        else:
            # preexec_fn not supported on Windows
            proc = Popen(command, stdin=PIPE, env=env)

        gateway_port = None
        # We use select() here in order to avoid blocking indefinitely if the subprocess dies
        # before connecting
        while gateway_port is None and proc.poll() is None:
            timeout = 1  # (seconds)
            readable, _, _ = select.select([callback_socket], [], [], timeout)
            if callback_socket in readable:
                gateway_connection = callback_socket.accept()[0]
                # Determine which ephemeral port the server started on:
                gateway_port = read_int(gateway_connection.makefile(mode="rb"))
                gateway_connection.close()
                callback_socket.close()
        if gateway_port is None:
            raise Exception(
                "Java gateway process exited before sending the driver its port number"
            )

        # In Windows, ensure the Java child processes do not linger after Python has exited.
        # In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
        # the parent process' stdin sends an EOF). In Windows, however, this is not possible
        # because java.lang.Process reads directly from the parent process' stdin, contending
        # with any opportunity to read an EOF from the parent. Note that this is only best
        # effort and will not take effect if the python process is violently terminated.
        if on_windows:
            # In Windows, the child process here is "spark-submit.cmd", not the JVM itself
            # (because the UNIX "exec" command is not available). This means we cannot simply
            # call proc.kill(), which kills only the "spark-submit.cmd" process but not the
            # JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all
            # child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx)
            def killChild():
                Popen([
                    "cmd", "/c", "taskkill", "/f", "/t", "/pid",
                    str(proc.pid)
                ])

            atexit.register(killChild)

    # Connect to the gateway
    gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True)

    # Import the classes used by PySpark
    java_import(gateway.jvm, "org.apache.spark.SparkConf")
    java_import(gateway.jvm, "org.apache.spark.api.java.*")
    java_import(gateway.jvm, "org.apache.spark.api.python.*")
    java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
    # TODO(davies): move into sql
    java_import(gateway.jvm, "org.apache.spark.sql.*")
    java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
    java_import(gateway.jvm, "scala.Tuple2")

    return gateway
Esempio n. 33
0
from pyspark.serializers import MarshalSerializer, PickleSerializer

from time import sleep

# for back compatibility
from pyspark.sql import SQLContext, HiveContext, SchemaRDD, Row

client = GatewayClient(port=int(sys.argv[1]))
sparkVersion = sys.argv[2]

if sparkVersion.startswith("1.4"):
    gateway = JavaGateway(client, auto_convert=True)
else:
    gateway = JavaGateway(client)

java_import(gateway.jvm, "org.apache.spark.SparkEnv")
java_import(gateway.jvm, "org.apache.spark.SparkConf")
java_import(gateway.jvm, "org.apache.spark.api.java.*")
java_import(gateway.jvm, "org.apache.spark.api.python.*")
java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")

bridge = gateway.entry_point
state = bridge.state()
state.markReady()

#jsc = bridge.javaSparkContext()

if sparkVersion.startswith("1.2"):
    java_import(gateway.jvm, "org.apache.spark.sql.SQLContext")
    java_import(gateway.jvm, "org.apache.spark.sql.hive.HiveContext")
    java_import(gateway.jvm, "org.apache.spark.sql.hive.LocalHiveContext")
Esempio n. 34
0
# -*- coding: utf-8 -*-
# nlg.py
import random
from datetime import datetime
from py4j_server import launch_py4j_server
from py4j.java_gateway import java_import

gateway = launch_py4j_server()

# Import the SimpleNLG classes
java_import(gateway.jvm, "simplenlg.features.*")
java_import(gateway.jvm, "simplenlg.realiser.*")

# Define aliases so that we don't have to use the gateway.jvm prefix.
NPPhraseSpec = gateway.jvm.NPPhraseSpec
PPPhraseSpec = gateway.jvm.PPPhraseSpec
SPhraseSpec = gateway.jvm.SPhraseSpec
InterrogativeType = gateway.jvm.InterrogativeType
Realiser = gateway.jvm.Realiser
TextSpec = gateway.jvm.TextSpec
Tense = gateway.jvm.Tense
Form = gateway.jvm.Form

date_endings = {
    "0": "0th",
    "1": "1st",
    "2": "2nd",
    "3": "3rd",
    "4": "4th",
    "5": "5th",
    "6": "6th",
Esempio n. 35
0
def import_flink_view(gateway):
    """
    import the classes used by PyFlink.
    :param gateway:gateway connected to JavaGateWayServer
    """
    # Import the classes used by PyFlink
    java_import(gateway.jvm, "org.apache.flink.table.api.*")
    java_import(gateway.jvm, "org.apache.flink.table.api.java.*")
    java_import(gateway.jvm, "org.apache.flink.table.api.bridge.java.*")
    java_import(gateway.jvm, "org.apache.flink.table.api.dataview.*")
    java_import(gateway.jvm, "org.apache.flink.table.catalog.*")
    java_import(gateway.jvm, "org.apache.flink.table.descriptors.*")
    java_import(gateway.jvm, "org.apache.flink.table.descriptors.python.*")
    java_import(gateway.jvm, "org.apache.flink.table.expressions.*")
    java_import(gateway.jvm, "org.apache.flink.table.sources.*")
    java_import(gateway.jvm, "org.apache.flink.table.sinks.*")
    java_import(gateway.jvm, "org.apache.flink.table.sources.*")
    java_import(gateway.jvm, "org.apache.flink.table.types.*")
    java_import(gateway.jvm, "org.apache.flink.table.types.logical.*")
    java_import(gateway.jvm, "org.apache.flink.table.util.python.*")
    java_import(gateway.jvm, "org.apache.flink.api.common.python.*")
    java_import(gateway.jvm,
                "org.apache.flink.api.common.typeinfo.TypeInformation")
    java_import(gateway.jvm, "org.apache.flink.api.common.typeinfo.Types")
    java_import(gateway.jvm, "org.apache.flink.api.java.ExecutionEnvironment")
    java_import(
        gateway.jvm,
        "org.apache.flink.streaming.api.environment.StreamExecutionEnvironment"
    )
    java_import(
        gateway.jvm,
        "org.apache.flink.api.common.restartstrategy.RestartStrategies")
    java_import(gateway.jvm,
                "org.apache.flink.python.util.PythonDependencyUtils")
    java_import(gateway.jvm, "org.apache.flink.python.PythonOptions")
    java_import(gateway.jvm,
                "org.apache.flink.client.python.PythonGatewayServer")
    java_import(gateway.jvm,
                "org.apache.flink.streaming.api.functions.python.*")
    java_import(gateway.jvm,
                "org.apache.flink.streaming.api.operators.python.*")
    java_import(gateway.jvm,
                "org.apache.flink.streaming.api.typeinfo.python.*")