def trainModel(): # Transfer package transfer = Transfer(master) transferKafka = Transfer(kafka) # Transfer datagenerator transferKafka.put('./kafkaProducer.py') # start kafka startKafka() # start spark cluster startSparkCluster() # Create Package os.system('sbt package') # Transfer files to master transferKafka.get('/home/ronald/random_centers.csv') transfer.put('./random_centers.csv') transferKafka.get('/home/ronald/centers.csv') transfer.put('./centers.csv') transferKafka.get('/home/ronald/data.csv') transfer.put('./data.csv') # Transfer spark application transfer.put( './target/scala-2.12/streamingkmeansmodeltrained_2.12-0.1.jar') master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelTraining ' '--master spark://' + str(masterHost) + ':7077 --executor-memory 2g ' '~/streamingkmeansmodeltrained_2.12-0.1.jar ' '192.168.122.121:9092 ' 'consumer-group ' 'test') runChecker() stop()
def transferLogs(): counter = 1 for connection in slaveConnections: transfer = Transfer(connection) transfer.get('logs/log.csv', 'log_slave' + str(counter) + '.csv') counter += 1 transfer = Transfer(master) transfer.get('logs/log.csv', 'log_master.csv') transfer = Transfer(producer) transfer.get('logs/log.csv', 'log_producer.csv')
def transfer_monitor(): for connection in all_connections: connection.run('rm monitor.py') connection.run('rm -rf logs') transfer = Transfer(connection) transfer.put('monitor.py') connection.run('mkdir logs')
def example_streaming_kmeans(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class com.example.kmeans.KMeansExample ' '--master spark://' + str(remote_host) + ':7077 ' '--executor-memory 2g ~/spark_example_2.12-0.1.jar')
def stopProducer(): try: producer.run('tmux kill-session -t socket') transfer = Transfer(producer) transfer.put('./retrieveProducerOutput.py') producer.run('python3 ~/retrieveProducerOutput.py') transfer.get('producerResult.txt') producer.run('rm ~/data/_*') except: print('Socket already closed!')
def transferFile(clusters='1'): transfer = [] for i in range(int(clusters)): transfer.append(Transfer(slaveConnections[i])) for connection in transfer: connection.put('./transferFile.py') for i in range(int(clusters)): slaveConnections[i].run('tmux new -d -s transferFile') slaveConnections[i].run( 'tmux send -t transferFile python3\ ~/transferFile.py ENTER')
def pgdump(c): cid = host.run( 'docker container ls | grep awsdemo_db | head -c 12').stdout.strip() host.run( '''docker container exec %s sh -c "pg_dump -U awsdemo awsdemo | gzip > '/var/lib/postgresql/backups/awsdemo.gz'"''' % cid) host.run( 'docker cp %s:/var/lib/postgresql/backups/awsdemo.gz /tmp/awsdemo.gz' % cid) t = Transfer(host) t.get('/tmp/awsdemo.gz')
def example_uber(): # temporary transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class uber.KMeansUber ' '--master spark://' + str(remote_host) + ':7077 ' '--executor-memory 2g ~/spark_example_2.12-0.1.jar')
def example_streaming(): # temporary transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/StreamingModeSpark/target/scala-2.12/streamingmodespark_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class example.stream.StructureStreaming ' '--master spark://' + str(remote_host) + ':7077 ' '--deploy-mode cluster ' '--executor-memory 100g ' '~/streamingmodespark_2.12-0.1.jar')
def example_datagenerator(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/StreamingModeSpark/target/scala-2.12/streamingmodespark_2.12-0.1.jar' ) c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-sql-kafka-0-10_2.12:3.0.0 ' '--class example.stream.DataGenerator ' '~/streamingmodespark_2.12-0.1.jar ' '10000 ' '~/100-bytes-lines.txt ' '100')
def example_kafka_trial(): transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) # transfer.put('/Users/ronnie/Documents/datagenerator/kafka_producer_example.py') c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.DirectKafkaWordCount ' '~/spark_example_2.12-0.1.jar ' 'localhost:9092 ' 'consumer-group ' 'test')
def runExperiment(clusters='3', numPorts='2', time='60000', executorMem='2g', batchDuration='1'): # transfer file transfer = Transfer(master) producerTransfer = Transfer(producer) # Start Monitors transferMonitor() startMonitor() # Transfer Producer producerTransfer.put('./producer.py') startProducer(numPorts) # SBT packaging os.system('sbt package') # start start cluster startSparkCluster(clusters) # transfer jar transfer.put( './target/scala-2.12/socketstreamingkmeansexperiment_2.12-0.1.jar') try: master.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class Experiment ' '--master spark://' + str(masterHost) + ':7077 --executor-memory ' + executorMem + ' ' '~/socketstreamingkmeansexperiment_2.12-0.1.jar ' '192.168.122.153 ' '10000 ' + numPorts + ' ' + time + ' ' + batchDuration) except: print('Spark Crashed while running') print('Application stopped at: {}'.format( datetime.now().strftime("%H:%M:%S.%f"))) finally: # transfer logs stopMonitor() transferLogs() # Restart all VMs stop()
def example_streaming_kmeans(): # transfer package transfer = Transfer(c2) transfer.put( '/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar' ) # transfer sample files c2.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelExample ' '~/spark_example_2.12-0.1.jar ' 'localhost:9092 ' 'consumer-group ' 'test')
def test_networkwordcount(): # Transfer package transfer = Transfer(master) # transfer.put('/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar') # master.run('rm -rf kmeansModel') transfer.put('./socketProducerExample.py') start_datagenerator() master.run( 'source /etc/profile && cd $SPARK_HOME && bin/run-example ' # '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' 'org.apache.spark.examples.streaming.NetworkWordCount ' # '--master spark://' + str(master_host) + ':7077 --executor-memory 2g ' # '~/spark_example_2.12-0.1.jar ' 'localhost ' '9999' # 'test' )
def testStructuredNetworkWordCount(): # Transfer package transfer = Transfer(master) # transfer.put('/Users/ronnie/Documents/spark_example/target/scala-2.12/spark_example_2.12-0.1.jar') # master.run('rm -rf kmeansModel') transfer.put('./socketProducerExample.py') start_spark_cluster() start_datagenerator() master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '--class org.apache.spark.examples.sql.streaming.StructuredNetworkWordCount ' '--master spark://' + str(master_host) + ':7077 ' # '--deploy-mode cluster ' # '--supervise ' '--executor-memory 2g ' 'examples/jars/spark-examples_2.12-3.0.0.jar ' 'localhost ' '9999' # 'test' )
def streaming_kmeans(): # Create Package os.system('sbt package') # Transfer package transfer = Transfer(master) transfer.put('./target/scala-2.12/spark_example_2.12-0.1.jar') # Transfer datagenerator transfer.put('./socketProducerExample.py') # start spark cluster start_spark_cluster() start_datagenerator() # start kafka # start_kafka() master.run('rm -rf kmeansModel') master.run( 'source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' # '--packages org.apache.spark:spark-streaming-kafka-0-10_2.12:3.0.0 ' '--class example.stream.StreamingKMeansModelExample ' # '--master spark://' + str(master_host) + ':7077 --executor-memory 2g ' '~/spark_example_2.12-0.1.jar ' 'localhost ' '9999' # 'test' )
def runChecker(): # transfer checker transfer = Transfer(master) transfer.put('./checker.py') master.run('source /etc/profile && cd $SPARK_HOME && bin/spark-submit ' '~/checker.py')
def transferToKafka(filename): transfer = Transfer(kafka) transfer.put(filename)
def transferToProducer(filename): transfer = Transfer(producer) transfer.put(filename)
def pgdump(c): cid = host.run('docker container ls | grep ' + APP_NAME.lower() +'_postgres | head -c 12').stdout.strip() host.run('''docker container exec %s sh -c "pg_dump -U %s %s | gzip > '/var/lib/postgresql/backups/%s.gz'"''' % (cid, DB_USER, DB_DB, DB_DB)) host.run('docker cp %s:/var/lib/postgresql/backups/%s.gz /tmp/%s.gz' % (cid, DB_DB, DB_DB)) t = Transfer(host) t.get('/tmp/%s.gz' % DB_NAME)
def testKafka(n='100'): transfer = Transfer(kafka) transfer.put('./kafkaProducer.py') transfer.put('./kafkaConsumer.py') startKafka(n)
def createFiles(): transfer = Transfer(producer) transfer.put('createFiles.py') producer.run('python3 createFiles.py 2500 20000 6')
def transfer_to_all(filename): for connection in all_connections: transfer = Transfer(connection) transfer.put(filename)
def transferFromSlave(filename): transfer = Transfer(slaveConnections[0]) transfer.get(filename)
def createFilesWeibull(): transfer = Transfer(producer) transfer.put('createFilesWeibull.py') producer.run('python3 createFilesWeibull.py 10. 50000 6 300')
def transfer_file_to(filename): transfer = Transfer(c2) transfer.put(filename)
def retreiveProducerOutput(): transfer = Transfer(master) transfer.get('/home/ronald/tmp/spark-events/app-20201105205551-0000')
def transferFromServer(filename): transfer = Transfer(conn) transfer.get(filename)
def transfer_logs_out(): counter = 1 for connection in all_connections: transfer = Transfer(connection) transfer.get('logs/log.csv', 'log' + str(counter) + '.csv') counter += 1
def closeMonitorPs(): for connection in slaveConnections + [master, producer]: transfer = Transfer(connection) transfer.put('closeMonitorPs.sh') connection.run('chmod u+x closeMonitorPs.sh') connection.run('./closeMonitorPs.sh')