class Classifiers_cluster(): def __init__(self): self.operations = Operations() self.master = "spark://golum:7077 " def Decision_Tree(self, input_file, model_name): if os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Spark_Decision_Tree.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application \ + input_file+ " "+model_name self.operations.runProcess(command) def Naive_Bayes(self, input_file): if os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Spark_Naive_Bayes.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application + input_file self.operations.runProcess(command) def Gradient_Boosted_Tree(self, input_file): if os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Spark_Gradient_Boosted_Tree.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application + input_file self.operations.runProcess(command) def MultiLayerPerceptron(self, input_file, num_features): if os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Spark_MultiLayer_Perceptron.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application + input_file + " " + num_features self.operations.runProcess(command) def PredictDecision_Tree(self, input_file): if os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Predict_DTModel.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application + input_file self.operations.runProcess(command)
class Classifiers_Prediction(): def __init__(self): self.operations = Operations() self.master = "spark://golum:7077 " # hadoopOperation = HadoopOperations() def Breast_Cancer_Prediction(self, input_file, hdfs=False): if hdfs == False & os.path.exists(input_file) == False: print "Invalid input file" return False spark_application = "../src/Breast_Cancer_Prediction.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application + input_file self.operations.runProcess(command + " >out.txt") def Breast_Cancer_Pred_stream(self): spark_application = "../src/Breast_Cancer_Prediction_Streaming.py " command = "$SPARK_HOME/bin/spark-submit --master " + self.master + spark_application self.operations.runProcess(command)
def add(self, x, y): self.result = Operations.addition(x, y) return self.result
def squareRoot(self, x): self.result = Operations.squareRoot(x) return self.result
def multiply(self, x, y): self.result = Operations.multiplication(x, y) return self.result
def divide(self, x, y): self.result = Operations.division(x, y) return self.result
def subtract(self, x, y): self.result = Operations.subtraction(x, y) return self.result
def calculateDiff(self, a, b): return Operations(a, b, "-", a - b).toJson()
def calculateSum(self, a, b): return Operations(a, b, "+", a + b).toJson()
def calculateDiv(self, a, b): return Operations(a, b, '/', a / b).toJson()
def calculateProduct(self, a, b): return Operations(a, b, '*', a * b).toJson()
def __init__(self): self.operations = Operations() self.master = "spark://golum:7077 "
if len(sys.argv) != 2: print("Usage: Naive Bayes_Spark <file>", file=sys.stderr) exit(-1) spark = SparkSession\ .builder\ .appName("NaiveBayesExample")\ .getOrCreate() # Load training data data = spark.read.format("libsvm") \ .load(sys.argv[1]) indexer = StringIndexer(inputCol="label", outputCol="indexedLabel") indexed_df = indexer.fit(data).transform(data) operations=Operations() operations.stringIndexerMapping(indexed_df,"label","indexedlabel") # Split the data into train and test splits = indexed_df.randomSplit([0.6, 0.4], 1234) train = splits[0] test = splits[1] # create the trainer and set its parameters nb = NaiveBayes(labelCol="indexedLabel",smoothing=1.0, modelType="multinomial") # train the model model = nb.fit(train) # select example rows to display.