class KafkaClassificationModelConsumerExecutor(object): def __init__(self, config): pass if config == None: self.configFile = "acm.config.dev.yml" self.configManager = ConfigManager( params={"config.file": self.configFile}) self.config = self.configManager.read() def start(self): pass k = KafkaClassificationModelConsumerProcess(self.config.toJson()) k.start() k.join()
class MulticlassClassifierExecutor(object): def __init__(self, config=None): pass if config == None: self.configFile = "acm.config.dev.yml" self.configManager = ConfigManager( params={"config.file": self.configFile}) self.config = self.configManager.read() # this method run as a child process # lr: logistic regression def lrModelTrainer(self, q, parentEnv, configJsonStr): pass classifier = MulticlassLogisticRegressionModelTrainer() classifier.start(q, parentEnv, configJsonStr) q.put(["hello parent!", "I am lrModelTrainer"]) # this is the start point of parent process def start(self): pass fs = [ self.lrModelTrainer, self.lrModelTrainer, ] #functions ps = [] #processes qs = [] # queues #passing objects to child process via queue env_ = json.dumps(os.environ.copy()) for f in fs: q = Queue() configJsonStr = self.config.toJson() p = Process(target=f, args=(q, env_, configJsonStr)) qs.append(q) p.start() ps.append(p) p.join() for q in qs: print(q.get())
class AcmTextLogisticRegressionClassifierView(APIView): """ List all snippets, or create a new snippet. """ def hdfsizePath(self, path): return self.hdfsServerUrl+path def classify(self, inputJson): pass self.hdfs = PyWebHdfsClient(host=self.config.acm.servers.hdfs.host,port=self.config.acm.servers.hdfs.restPort, user_name=self.config.acm.servers.hdfs.fileOwner) self.hdfsServerUrl = "hdfs://"+self.config.acm.servers.hdfs.host+":"+str(self.config.acm.servers.hdfs.port) if hasattr(self, 'sc')==False: self.sc =SparkContext() if hasattr(self, 'sqlContext')==False: self.sqlContext = SQLContext(self.sc) schema = StructType([StructField('Category', StringType(), True), StructField('Descript', StringType(), True), StructField('Dates', StringType(), True), StructField('DayOfWeek', StringType(), True), StructField('PdDistrict', StringType(), True), StructField('Resolution', StringType(), True), StructField('Address', StringType(), True), StructField('X', DoubleType(), True), StructField('Y', DoubleType(), True) ]) test = self.sqlContext.createDataFrame(inputJson, schema) #pipeline= PipelineModel.load("/home/halil/gitlab/acm/pyspark/acm-text-classification-rest/lr.model.pipeline.savepoint") pipeline= PipelineModel.load(self.pipelineHdfsPath) testData = pipeline.transform(test) print("Test Dataset Count: " + str(testData.count())) ########################################################## ################## Train/load the model ################## ########################################################## #lrModel = LogisticRegressionModel.load("/home/halil/gitlab/acm/pyspark/acm-text-classification-rest/lr.model.savepoint") lrModel = LogisticRegressionModel.load(self.modelHdfsPath) predictions = lrModel.transform(testData) predictions.filter(predictions['prediction'] == 7) \ .select("Descript","Category","probability","label","prediction") \ .orderBy("probability", ascending=False) \ .show(n = 10, truncate = 30) #.select("probability","label","prediction") \ resultJson = predictions.filter(predictions['prediction'] == 7) \ .select("prediction") \ .orderBy("probability", ascending=False) \ .toJSON().collect() self.sc.stop() return ["al sana ML!", resultJson] def get(self, request, format=None): print ("django-root:", settings.BASE_DIR) self.configFile= os.path.join(settings.BASE_DIR, "acm.config.dev.yml") self.configManager = ConfigManager(params={"config.file":self.configFile}) self.config = self.configManager.read() self.hdfsServerUrl = "hdfs://"+self.config.acm.servers.hdfs.host+":"+str(self.config.acm.servers.hdfs.port) #jsonFile = '/home/halil/gitlab/acm/pyspark/acm-text-classification-rest/jsontordd.test.json' jsonFile = os.path.join(settings.BASE_DIR, self.config.acm.test.test001.jsonRequestPath) self.modelHdfsPath = self.hdfsizePath(self.config.acm.test.test001.modelHdfsPath) self.pipelineHdfsPath = self.hdfsizePath(self.config.acm.test.test001.pipelineHdfsPath) json_ = None with open(jsonFile, 'r') as f: content = f.read() json_ = json.loads(content) ret_ = self.classify(json_) return Response(["hello","world!", ret_]) def post(self, request, format=None): return self.get(request, format)