def handleImbalanceProblem(self, dataset, labels, model, epochs): model.getCompiledModel() model = model.model test_labels_orig = test_labels_pred = None train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit( dataset, labels) train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit( train_images, test_images, train_labels, test_labels) dataSequence = DatasetSequence(train_images, train_labels, test_images, test_labels, model, epochs) history = model.fit(dataSequence, epochs=epochs, validation_data=(test_images, test_labels)) predictions = model.predict(dataSequence.test_data) if predictions.shape[1] > 1: test_labels_orig = np.argmax(dataSequence.test_labels, axis=1) test_labels_pred = np.argmax(predictions, axis=1) else: test_labels_orig = dataSequence.test_labels test_labels_pred = np.where(predictions > 0.5, 1, 0) logger.info( classification_report(y_true=test_labels_orig, y_pred=test_labels_pred, zero_division=1)) logger.info( precision_recall_fscore_support(y_true=test_labels_orig, y_pred=test_labels_pred, average='weighted')) return history
def handleImbalanceProblem(self, dataset, labels, model, epochs): model.getCompiledModel(loss=self.meanFalseError, runEagerly=True) model = model.model train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit( dataset, labels) train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit( train_images, test_images, train_labels, test_labels) history = model.fit(train_images, train_labels, epochs=epochs, validation_data=(test_images, test_labels)) predictions = model.predict(test_images) if predictions.shape[1] > 1: test_labels_orig = np.argmax(test_labels, axis=1) test_labels_pred = np.argmax(predictions, axis=1) else: test_labels_orig = test_labels test_labels_pred = np.where(predictions > 0.5, 1, 0) logger.info( classification_report(y_true=test_labels_orig, y_pred=test_labels_pred)) logger.info( precision_recall_fscore_support(y_true=test_labels_orig, y_pred=test_labels_pred, average='weighted')) return history
def __init__(self, topology_model): self.topology = topology_model # create a workspace self.base_path = Compiler.generate_basic_workspace() # code file path file_name = os.path.join(self.base_path, 'topology.json') try: try: with open(file_name, 'w') as text_file: t_dict = self.build_backend_json(self.topology) t_json = json.dumps(t_dict) logger.info(t_json) text_file.write(t_json) except OSError as e: raise TopologyWriteException(e.message) self.generate_pom_file( base_path=self.base_path, name=self.topology[TopologyConsts.FIELD_NAME], version='1.0', dependencies=self.build_dependencies()) except ModuleWriteException as Ex: raise Ex except ModuleErrorCompilingException as Ex: raise Ex
def get_log(topology_name, num_lines=10): filenames = StormUI.getWorkersByTopologyName(topology_name) lines = "" if filenames: logger.debug("Filenames for topology " + topology_name + " -> " + str(filenames)) for filename in filenames: logger.info("Topology :" + topology_name + " - LogFilename: " + filename) # get log file from storm cluster n_lines = int(num_lines) * 100 content = StormUI.getFile(filename, n_lines) try: # Remove HTML tags from Storm Log 8000 port logcontent = BeautifulSoup(content, "lxml").find( "pre", {"id": "logContent"}) if logcontent: lines += "\n###################################################\n" (hostname, port, name) = LogViewer.parse_worker_info(filename) lines += "Log from worker: " + hostname + " - " + name + "\n" lines += "###################################################\n" lines += logcontent.getText() logger.debug("Getting " + str(len(lines.splitlines())) + " lines.") except Exception as e: return "Error parsing data from Storm UI:" + str(e) return lines
def copyID(self,keyModel,username=None,host=None): from logger.Logger import logger self.keyModel=keyModel self.pubkey=self.keyModel.getPubKey() # Use of a singleton here means that we should be able to do SSO on any AAF/Shibolleth web service. However we might have to guess the IdP. self.session=cvlsshutils.RequestsSessionSingleton.RequestsSessionSingleton().GetSession() destURL='https://portal.synchrotron.org.au:443/api/v1/oauth/token' auth=cvlsshutils.ASyncAuth.ASyncAuth(self.session,destURL,parent=self.parent,extraParams=self.extraParams,**self.kwargs) apitoken=auth.gettoken() self.updateDict=auth.getUpdateDict() try: self.postKey(apitoken) except Exception as e: raise e logger.info('copied pub key %s to user account %s'%(self.pubkey,self.username))
def balanceDataset(self, dataset, labels): classes, classesCount = np.unique(labels, return_counts=True) logger.info(classesCount) highestClassCount = np.amax(classesCount) for classId, classCount in zip(classes, classesCount): if classCount < highestClassCount: amountToCopy = highestClassCount - classCount logger.info("Oversampling class {} with {} samples".format( classId, amountToCopy)) classIndexes = np.where(labels == classId)[0].tolist() indexesToCopy = random.choices(classIndexes, k=amountToCopy) copiedDataset = np.copy(dataset[indexesToCopy, ]) copiedLabels = np.copy(labels[indexesToCopy, ]) dataset = np.concatenate((dataset, copiedDataset)) labels = np.concatenate((labels, copiedLabels)) return dataset, labels
def add_java_module(module_name, module_type): cmd_launch = [ "cd", conf.CLASSES_TMP_PATH + ";", conf.JAVA_JAR_BIN, "uf", conf.SINFONIER_LAST_JAR, module_type + module_name + ".class" ] os.remove(conf.CLASSES_TMP_PATH + module_type + module_name + ".class") result = CommandExecutor.execute(cmd_launch, capture_out=True) f = open(conf.CLASSES_TMP_PATH + module_type + module_name + ".info") listclasses = f.read().splitlines() f.close() for classjava in listclasses: logger.info("Updating JAR with class " + classjava) cmd_launch = [ "cd", conf.CLASSES_TMP_PATH + ";", conf.JAVA_JAR_BIN, "uf", conf.SINFONIER_LAST_JAR, module_type + classjava.replace("$", "\$") ] CommandExecutor.execute(cmd_launch, capture_out=True) os.remove(conf.CLASSES_TMP_PATH + module_type + classjava) os.remove(conf.CLASSES_TMP_PATH + module_type + module_name + ".info")
def request_job(): start_time = time.time() content = request.get_json(silent=True) priority = content['priority'] cpu = content['cpu'] logger.info('Priority : %s, CPU : %s' % (priority, cpu)) if priority == 'None': print('Priority is None.') priority = -1 output = check_output('bash /STREAM/run.sh', shell=True, stderr=STDOUT) else: output = check_output('bash /STREAM/run.sh %s' % (priority), shell=True, stderr=STDOUT) output = output.decode('utf-8').replace('\n', '') end_time = time.time() store_result(priority, output, cpu, start_time, end_time) return json.dumps({'status': 'success'})
def handleImbalanceProblem(self, dataset, labels, model, epochs): model.getCompiledModel() model = model.model flatten = labels.flatten() class_weights = class_weight.compute_class_weight( 'balanced', classes=np.unique(labels), y=flatten) classWeightsMap = { idx: ratio for idx, ratio in enumerate(class_weights) } train_images, test_images, train_labels, test_labels = DataProcessor.trainTestSplit( dataset, labels) train_images, test_images, train_labels, test_labels = DataProcessor.prepareDatasetForFit( train_images, test_images, train_labels, test_labels) history = model.fit(train_images, train_labels, epochs=epochs, validation_data=(test_images, test_labels), class_weight=classWeightsMap) predictions = model.predict(test_images) if predictions.shape[1] > 1: test_labels_orig = np.argmax(test_labels, axis=1) test_labels_pred = np.argmax(predictions, axis=1) else: test_labels_orig = test_labels test_labels_pred = np.where(predictions > 0.5, 1, 0) logger.info( classification_report(y_true=test_labels_orig, y_pred=test_labels_pred)) logger.info( precision_recall_fscore_support(y_true=test_labels_orig, y_pred=test_labels_pred, average='weighted')) return history
def on_epoch_end(self): self.counter += 1 if self.counter == 10: logger.info("return") return predictions = self.model.predict(self.train_data) loss = tfa.losses.sigmoid_focal_crossentropy( tf.cast(self.train_labels, tf.float32), predictions) if predictions.shape[1] > 1: y_true = np.argmax(self.train_labels, axis=1) y_pred = np.argmax(predictions, axis=1) else: y_true = self.train_labels y_pred = np.where(predictions > 0.5, 1, 0) classes, classesCount = np.unique(y_true, return_counts=True) highestClassCount = np.amax(classesCount) for classId, classCount in zip(classes, classesCount): if classCount < highestClassCount: # hard mining amountToCopy = highestClassCount - classCount classIndexes = np.where(y_true == classId)[0].tolist() classesLoss = [(idx, loss[idx]) for idx in classIndexes] classesLoss.sort(key=lambda x: x[1]) indexesToCopy = [ x[0] for x in classesLoss[-self.hardMiningSample:] ] for i in range(int(amountToCopy / self.hardMiningSample)): copiedData = np.copy(self.train_data[indexesToCopy, ]) copiedLabels = np.copy(self.train_labels[indexesToCopy, ]) self.train_data = np.concatenate( (self.train_data, copiedData)) self.train_labels = np.concatenate( (self.train_labels, copiedLabels)) logger.info(np.unique(self.train_data)[1]) logger.info(self.train_data.shape) logger.info(self.train_labels.shape)
def on_epoch_end(self): self.counter += 1 if self.counter == self.epochs: logger.info("return") return predictions = self.model.predict(self.train_data) y_true = None y_pred = None if self.train_labels.shape[1] > 1: y_true = np.argmax(self.train_labels, axis=1) y_pred = np.argmax(predictions, axis=1) else: y_true = self.train_labels y_pred = np.where(predictions > 0.5, 1, 0) report = classification_report(y_true=y_true, y_pred=y_pred, output_dict=True) if self.counter == self.epochs - 1: logger.info(classification_report(y_true=y_true, y_pred=y_pred)) classesCounts = np.unique(y_true, return_counts=True)[1] averageClassSize = math.ceil(np.mean(classesCounts)) totalsScores = [ report[label]['f1-score'] for label in report if label not in ["accuracy", "macro avg", "weighted avg"] ] for ix, s in enumerate(totalsScores): logger.info("{} : {}".format(ix, s)) totals = [ 1 - report[label]['f1-score'] for label in report if label not in ["accuracy", "macro avg", "weighted avg"] ] f1ScoreTotal = np.sum([ 1 - report[label]['f1-score'] for label in report if label not in ["accuracy", "macro avg", "weighted avg"] ]) / len(classesCounts) updatedSampleSize = {} for label in report: if label not in ["accuracy", "macro avg", "weighted avg"]: updatedSampleSize[int(label)] = int( ((1 - report[label]['f1-score']) / f1ScoreTotal) * averageClassSize) logger.info(updatedSampleSize) logger.info("mean class size: {}".format(averageClassSize)) classes, classesCount = np.unique(y_true, return_counts=True) logger.info(classesCount) for classId, classCount in zip(classes, classesCount): if classCount > updatedSampleSize[classId]: logger.info("Undersample class {}".format(classId)) amountToRemove = classCount - updatedSampleSize[classId] classIndexes = np.where(y_true == classId)[0].tolist() indexesToRemove = random.sample(classIndexes, amountToRemove) self.train_data = np.delete(self.train_data, indexesToRemove, 0) self.train_labels = np.delete(self.train_labels, indexesToRemove, 0) elif classCount < updatedSampleSize[classId]: logger.info("Oversample class {}".format(classId)) amountToCopy = updatedSampleSize[classId] - classCount classIndexes = np.where(y_true == classId)[0].tolist() indexesToCopy = random.choices(classIndexes, k=amountToCopy) copiedDataset = np.copy(self.train_data[indexesToCopy, ]) copiedLabels = np.copy(self.train_labels[indexesToCopy, ]) self.train_data = np.concatenate( (self.train_data, copiedDataset)) self.train_labels = np.concatenate( (self.train_labels, copiedLabels)) if self.train_labels.shape[1] > 1: y_true = np.argmax(self.train_labels, axis=1) else: y_true = self.train_labels
parser.add_argument('--production', action='store_true') parser.add_argument('--docker', action='store_true') args = parser.parse_args() if args.production: os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.PROD_ENVIRONMENT elif args.docker: os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.DOCKER_ENVIRONMENT else: os.environ[EnvConst.SINFONIER_ENV_KEY] = EnvConst.DEVELOP_ENVIRONMENT from config.config import conf from config.Routes import Routes from logger.Logger import logger routes = Routes() api = routes.api from wsgiref import simple_server httpd = simple_server.make_server(conf.SINFONIER_API_HOST, conf.SINFONIER_API_PORT, api) logger.info('Server up! running in ' + conf.SINFONIER_API_HOST + ':' + str(conf.SINFONIER_API_PORT)) logger.info(os.environ[EnvConst.SINFONIER_ENV_KEY].upper() + ' mode') httpd.serve_forever() else: from utils.SinfonierConstants import Environment os.environ[Environment.SINFONIER_ENV_KEY] = Environment.PROD_ENVIRONMENT app = running_server()
def log(self): logger.info(self.stdout) if self.stderr: logger.error(self.stderr)