def setup_method(self, method): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ sparkConf = init_spark_conf().setMaster("local[4]").setAppName( "test feature set") self.sc = init_nncontext(sparkConf)
def create_sc(self, submit_args, conf): submit_args = submit_args + " pyspark-shell" os.environ["PYSPARK_SUBMIT_ARGS"] = submit_args spark_conf = init_spark_conf(conf) sc = init_nncontext(conf=spark_conf, spark_log_level=self.spark_log_level, redirect_spark_log=self.redirect_spark_log) return sc
def setup_method(self, method): """ setup any state tied to the execution of the given method in a class. setup_method is invoked for every test method of a class. """ sparkConf = init_spark_conf().setMaster("local[1]").setAppName("testEstimator") self.sc = init_nncontext(sparkConf) self.sqlContext = SQLContext(self.sc) assert(self.sc.appName == "testEstimator")
def init_spark_on_local(self, cores, conf=None, python_location=None): print("Start to getOrCreate SparkContext") if "PYSPARK_PYTHON" not in os.environ: os.environ["PYSPARK_PYTHON"] = \ python_location if python_location else detect_python_location() master = "local[{}]".format(cores) zoo_conf = init_spark_conf(conf).setMaster(master) sc = init_nncontext(conf=zoo_conf, spark_log_level=self.spark_log_level, redirect_spark_log=self.redirect_spark_log) print("Successfully got a SparkContext") return sc
print(args) app_name = args.app_name data_source_path = args.data_source_path model_file_name = app_name + '.h5' save_model_dir = args.model_dir + model_file_name u_limit = int(args.u_limit) m_limit = int(args.m_limit) neg_rate = int(args.neg_rate) sliding_length = int(args.sliding_length) u_output = int(args.u_output) m_output = int(args.m_output) max_epoch = int(args.max_epoch) batch_size = int(args.batch_size) predict_output_path = args.inference_output_path sparkConf = init_spark_conf() sc = init_nncontext(sparkConf) spark = SparkSession \ .builder \ .appName(app_name) \ .getOrCreate() start = time.time() uDF, mDF, tDF = ncf_features.load_csv(spark, data_source_path, u_limit, m_limit) trainingDF = ncf_features.genData(tDF, sc, spark, args.train_start, args.train_end, neg_rate, sliding_length, u_limit, m_limit) #trainingDF.show(5) validationDF = ncf_features.genData(tDF, sc, spark, args.validation_start, args.validation_end, neg_rate,