def test_session(integration_url, capsys, session_kind, params): assert _livy_available(integration_url) with LivySession.create(integration_url, kind=session_kind) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, "") session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, "") with pytest.raises(SparkRuntimeError): session.run(params.error_code) assert session.download("df").equals(RANGE_DATAFRAME) session.upload("uploaded", RANGE_DATAFRAME) session.run(params.dataframe_multiply_code) assert session.download("multiplied").equals(RANGE_DATAFRAME * 2) session.upload("text", TEXT_DATAFRAME) session.run(params.dataframe_trim_code) assert session.download("trimmed").equals( TEXT_DATAFRAME.applymap(lambda s: s.strip())) assert _session_stopped(integration_url, session.session_id)
def test_sql_session(): with LivySession(LIVY_URL, kind=SessionKind.PYSPARK, auth=auth) as session: # assert session.state == SessionState.IDLE session.run(PYSPARK_CREATE_DF) # session.run(SQL_CREATE_VIEW) # session.run("df = spark.sql('SELECT COUNT(*) FROM view')") df = session.read('df') count = df.count()['value'] assert count == 100
def test_sql_session(): assert livy_available() with LivySession(LIVY_URL, kind=SessionKind.SQL) as session: assert session.state == SessionState.IDLE session.run(SQL_CREATE_VIEW) output = session.run('SELECT COUNT(*) FROM view') assert output.json['data'] == [[100]] with pytest.raises(SparkRuntimeError): session.run('not valid SQL!') expected = pandas.DataFrame({'id': range(100)}) assert session.read_sql('SELECT * FROM view').equals(expected) assert session_stopped(session.session_id)
def test_sql_session(integration_url): assert _livy_available(integration_url) with LivySession.create(integration_url, kind=SessionKind.SQL) as session: assert session.state == SessionState.IDLE session.run(SQL_CREATE_VIEW) output = session.run("SELECT COUNT(*) FROM view") assert output.json["data"] == [[100]] with pytest.raises(SparkRuntimeError): session.run("not valid SQL!") assert session.download_sql("SELECT * FROM view").equals( RANGE_DATAFRAME) assert _session_stopped(integration_url, session.session_id)
def test_sql_session(integration_url): assert livy_available(integration_url) with LivySession(integration_url, kind=SessionKind.SQL) as session: assert session.state == SessionState.IDLE session.run(SQL_CREATE_VIEW) output = session.run("SELECT COUNT(*) FROM view") assert output.json["data"] == [[100]] with pytest.raises(SparkRuntimeError): session.run("not valid SQL!") expected = pandas.DataFrame({"id": range(100)}) assert session.read_sql("SELECT * FROM view").equals(expected) assert session_stopped(integration_url, session.session_id)
def exec_query(self, query, params=None, **kwargs): """ Executes Query in the database. :param query: str - query to be executed. :param params: list - list of parameters to be used if necessary in query :return: result of query """ df_variable_name = kwargs.get('df_variable_name', 'df') with LivySession.create(**self.params) as session: # Run some code on the remote cluster session.run(dedent(""" spark.sql('use default') df = spark.sql('show databases') """.format(*params))) # Retrieve the result df = session.read(df_variable_name) return df
def test_session(capsys, session_kind, params): with LivySession(LIVY_URL, kind=session_kind, auth=auth) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, '') session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, '') with pytest.raises(SparkRuntimeError): session.run(params.error_code) expected = pandas.DataFrame({'value': range(100)}, dtype='int64') received = session.read('df') assert received.equals(expected)
def test_session(integration_url, capsys, session_kind, params): assert _livy_available(integration_url) with LivySession.create(integration_url, kind=session_kind) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, "") session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, "") with pytest.raises(SparkRuntimeError): session.run(params.error_code) assert session.read("df").equals(RANGE_EXPECTED_DATAFRAME) assert _session_stopped(integration_url, session.session_id)
def test_session(capsys, session_kind, params): assert livy_available() with LivySession(LIVY_URL, kind=session_kind) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, '') session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, '') with pytest.raises(SparkRuntimeError): session.run(params.error_code) expected = pandas.DataFrame({'value': range(100)}) assert session.read('df').equals(expected) assert session_stopped(session.session_id)
def test_session(integration_url, capsys, session_kind, params): assert livy_available(integration_url) with LivySession(integration_url, kind=session_kind) as session: assert session.state == SessionState.IDLE session.run(params.print_foo_code) assert capsys.readouterr() == (params.print_foo_output, "") session.run(params.create_dataframe_code) capsys.readouterr() session.run(params.dataframe_count_code) assert capsys.readouterr() == (params.dataframe_count_output, "") with pytest.raises(SparkRuntimeError): session.run(params.error_code) expected = pandas.DataFrame({"value": range(100)}) assert session.read("df").equals(expected) assert session_stopped(integration_url, session.session_id)
self.modelId) + "/contents?name=" + str(self.modelFilename) with open(self.modelFilename, 'rb') as pklFile: r = requests.post(url, data=pklFile, headers=headers) fileResult = r.json() if 'creationTimeStamp' in fileResult: self.log("Uploaded model file to server.") else: self.log("Error!") self.log(fileResult) if __name__ == "__main__": if len(sys.argv) < 2: print("Not enough arguments! Usage: python " + str(sys.argv[0]) + " <command>") exit() else: core = Core() if sys.argv[1] == 'train': with LivySession(LIVY_URL) as session: session.run(train_code) os.system( 'scp [email protected]:/tmp/models/logit [email protected]:/home/sasdemo/Viya_Spark_orchestration/' ) core.createNewVersion() core.uploadModelFile() elif sys.argv[1] == 'score': with LivySession(LIVY_URL) as session: session.run(score_code)