def trainAgent(configDir, exitOnFail=False): try: multiprocessing.set_start_method('spawn') except RuntimeError: pass config = KwolaCoreConfiguration(configDir) # Create the bugs directory. This is just temporary config.getKwolaUserDataDirectory("bugs") # Load and save the agent to make sure all training subprocesses are synced agent = DeepLearningAgent(config=config, whichGpu=None) agent.initialize(enableTraining=False) agent.load() agent.save() del agent # Create and destroy an environment, which forces a lot of the initial javascript in the application # to be loaded and translated. It also just verifies that the system can access the target URL prior # to trying to run a full sequence environment = WebEnvironment(config, sessionLimit=1) environment.shutdown() del environment trainingSequence = TrainingSequence( id=CustomIDField.generateNewUUID(TrainingSequence, config)) trainingSequence.startTime = datetime.now() trainingSequence.status = "running" trainingSequence.trainingStepsCompleted = 0 trainingSequence.saveToDisk(config) testingSteps = [ step for step in TrainingManager.loadAllTestingSteps(config) if step.status == "completed" ] if len(testingSteps) == 0: runRandomInitialization(config, trainingSequence, exitOnFail=exitOnFail) trainingSequence.saveToDisk(config) runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail) trainingSequence.status = "completed" trainingSequence.endTime = datetime.now() trainingSequence.saveToDisk(config)
def trainAgent(configDir, exitOnFail=False): try: multiprocessing.set_start_method('spawn') except RuntimeError: pass config = KwolaCoreConfiguration(configDir) # Create the bugs directory. This is just temporary config.getKwolaUserDataDirectory("bugs") # Load and save the agent to make sure all training subprocesses are synced agent = DeepLearningAgent(config=config, whichGpu=None) agent.initialize(enableTraining=False) agent.load() agent.save() del agent browsers = getAvailableBrowsers(config) # Create and destroy an environment, which forces a lot of the initial javascript in the application # to be loaded and translated. It also just verifies that the system can access the target URL prior # to trying to run a full sequence environment = WebEnvironment(config, sessionLimit=1, browser=browsers[0]) environment.shutdown() del environment files = [ fileName for fileName in os.listdir( config.getKwolaUserDataDirectory("training_sequences")) if ".lock" not in fileName ] if len(files) == 0: trainingSequence = TrainingSequence( id=CustomIDField.generateNewUUID(TrainingSequence, config)) trainingSequence.startTime = datetime.now() trainingSequence.status = "running" trainingSequence.trainingLoopsCompleted = 0 trainingSequence.trainingStepsLaunched = 0 trainingSequence.testingStepsLaunched = 0 trainingSequence.saveToDisk(config) else: sequenceId = files[0] sequenceId = sequenceId.replace(".pickle", "") sequenceId = sequenceId.replace(".json", "") sequenceId = sequenceId.replace(".gz", "") LockedFile.clearLockFile( os.path.join( config.getKwolaUserDataDirectory("training_sequences"), files[0])) trainingSequence = TrainingSequence.loadFromDisk(sequenceId, config) testingSteps = [ step for step in TrainingManager.loadAllTestingSteps(config) if step.status == "completed" ] if len(testingSteps) == 0: runRandomInitialization(config, trainingSequence, exitOnFail=exitOnFail) trainingSequence.saveToDisk(config) runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail) generateAllCharts(config, enableCumulativeCoverage=True) trainingSequence.status = "completed" trainingSequence.endTime = datetime.now() trainingSequence.saveToDisk(config) for folder in config['train_agent_loop_delete_folders_on_finish']: fullPath = config.getKwolaUserDataDirectory(folder) shutil.rmtree(fullPath)
def runTesting(self): getLogger().info(f"[{os.getpid()}] Starting New Testing Sequence") resultValue = {'success': True} try: try: multiprocessing.set_start_method('spawn') except RuntimeError: pass self.testStep.startTime = datetime.now() self.testStep.status = "running" self.testStep.saveToDisk(self.config) resultValue["testingStepId"] = str(self.testStep.id) self.createExecutionSessions() self.createTestingSubprocesses() for plugin in self.testingStepPlugins: plugin.testingStepStarted(self.testStep, self.executionSessions) self.environment = WebEnvironment( config=self.config, executionSessions=self.executionSessions) self.loopTime = datetime.now() while self.stepsRemaining > 0: self.stepsRemaining -= 1 self.removeBadSessions() if len(self.executionSessions) == 0: break self.executeSingleAction() if self.config[ 'testing_reset_agent_period'] == 1 or self.stepsRemaining % self.config[ 'testing_reset_agent_period'] == ( self.config['testing_reset_agent_period'] - 1): self.restartOneTestingSubprocess() self.step += 1 self.killAndJoinTestingSubprocesses() self.removeBadSessions() # Ensure all the trace objects get saved to disc self.traceSaveExecutor.shutdown() self.environment.runSessionCompletedHooks() self.savePlainVideoFiles() for session in self.executionSessions: session.endTime = datetime.now() session.saveToDisk(self.config) # We shutdown the environment before generating the annotated videos in order # to conserve memory, since the environment is no longer needed after this point self.shutdownEnvironment() self.testStep.status = "completed" self.testStep.endTime = datetime.now() self.testStep.executionSessions = [ session.id for session in self.executionSessions ] for plugin in self.testingStepPlugins: plugin.testingStepFinished(self.testStep, self.executionSessions) self.testStep.saveToDisk(self.config) resultValue['successfulExecutionSessions'] = len( self.testStep.executionSessions) resultValue['success'] = True for traceList in self.executionSessionTraceLocalPickleFiles: for fileName in traceList: os.unlink(fileName) except Exception as e: getLogger().error( f"[{os.getpid()}] Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}" ) resultValue['success'] = False resultValue['exception'] = traceback.format_exc() # This print statement will trigger the parent manager process to kill this process. getLogger().info(f"[{os.getpid()}] Finished Running Testing Sequence!") return resultValue
# -*- coding: utf-8 -*- import tempfile,os,sys,pickle if sys.platform == 'win32': from multiprocessing import Process else: import billiard billiard.set_start_method('spawn') from billiard import Process import matplotlib matplotlib.use('Qt5Agg') from hydroid.HYDROIDexp import assign_peaks_interactive,call_peaks_interactive,fit_peaks,plot_prof_on_seq class hydroidConfig(object): def __init__(self,laneList): self.laneList=laneList #we use it like that as regular temfile can not be opend multiple times in windows self.fd,self.configFile=tempfile.mkstemp() with os.fdopen(self.fd,'w') as file: file.write('''# #column - name of column with an array of profile values in lane_profile.xls #lanme - arbitrary name of the gel lane (e.g. experiment label). #leftlim, rightlim - indexes of values in profile array that specify the datarange that will be analyzed, in NaN dataset is not truncated #peakthresh - parameter for automatic peak identification algorithm, use lower values to identify more subtle peaks. However, this may lead to false positives. #min_dist_left - minimal distance between the individual peaks allowed at the left side of the data range. #min_dist_right - minimal distance between the individual peaks allowed at the right side of the data range. #segments - number of segment the data range is split into for linear interpolation of the min_dist value for the automatic peak identification algorithm. #base - try to substract linear baseline from the profile before peak identification attempt. #interpolate - try to guess the location of unidentified peaks from the local average spacing betwee the nearby peaks.
def trainAgent(config, exitOnFail=False): try: multiprocessing.set_start_method('spawn') except RuntimeError: pass config = KwolaCoreConfiguration(config) if config['train_agent_loop_wait_for_other_kwola_processes_to_exit']: if checkIfProcessRunning("kwola"): getLogger().info("Waiting for the other Kwola process to finish running. If you want to run multiple Kwola processes at once, please change the train_agent_loop_wait_for_other_kwola_processes_to_exit configuration variable.") while checkIfProcessRunning("kwola"): time.sleep(1) # Load and save the agent to make sure all training subprocesses are synced agent = DeepLearningAgent(config=config, whichGpu=None) agent.initialize(enableTraining=False) agent.load() agent.save() del agent files = [fileName for fileName in os.listdir(config.getKwolaUserDataDirectory("training_sequences")) if ".lock" not in fileName] if len(files) == 0: trainingSequence = TrainingSequence(id=CustomIDField.generateNewUUID(TrainingSequence, config)) trainingSequence.startTime = datetime.now() trainingSequence.status = "running" trainingSequence.trainingLoopsCompleted = 0 trainingSequence.trainingStepsLaunched = 0 trainingSequence.testingStepsLaunched = 0 trainingSequence.saveToDisk(config) else: sequenceId = files[0] sequenceId = sequenceId.replace(".pickle", "") sequenceId = sequenceId.replace(".json", "") sequenceId = sequenceId.replace(".gz", "") sequenceId = sequenceId.replace(".enc", "") trainingSequence = TrainingSequence.loadFromDisk(sequenceId, config) testingSteps = [step for step in TrainingManager.loadAllTestingSteps(config) if step.status == "completed"] if len(testingSteps) == 0: browsers = getAvailableBrowsers(config) # Create and destroy an environment, which forces a lot of the initial javascript in the application # to be loaded and translated. It also just verifies that the system can access the target URL prior # to trying to run a full sequence environment = WebEnvironment(config, sessionLimit=1, browser=browsers[0]) environment.shutdown() del environment if len(testingSteps) < config['training_random_initialization_sequences']: runRandomInitialization(config, trainingSequence, exitOnFail=exitOnFail) trainingSequence.saveToDisk(config) loopsCompleted = runMainTrainingLoop(config, trainingSequence, exitOnFail=exitOnFail) generateAllCharts(config, applicationId=None, enableCumulativeCoverage=True) trainingSequence.status = "completed" trainingSequence.endTime = datetime.now() trainingSequence.saveToDisk(config) if config['train_agent_loop_email_results'] and loopsCompleted > 0: from ..components.utils.email import sendExperimentResults sendExperimentResults(config) for folder in config['train_agent_loop_delete_folders_on_finish']: fullPath = config.getKwolaUserDataDirectory(folder) shutil.rmtree(fullPath)
def runTesting(self): if self.config['print_configuration_on_startup']: getLogger().info( f"Starting New Testing Sequence with configuration:\n{pformat(self.config.configData)}" ) else: getLogger().info(f"Starting New Testing Sequence") resultValue = {'success': True} try: try: multiprocessing.set_start_method('spawn') except RuntimeError: pass self.testStep.startTime = datetime.now() self.testStep.status = "running" self.testStep.saveToDisk(self.config) resultValue["testingStepId"] = str(self.testStep.id) self.createExecutionSessions() if self.config['testing_enable_prediction_subprocess']: self.createTestingSubprocesses() for plugin in self.testingStepPlugins: plugin.testingStepStarted(self.testStep, self.executionSessions) self.environment = WebEnvironment( config=self.config, executionSessions=self.executionSessions, plugins=self.webEnvironmentPlugins, browser=self.browser, windowSize=self.windowSize) self.loopTime = datetime.now() while self.stepsRemaining > 0: self.stepsRemaining -= 1 self.removeBadSessions() if len(self.executionSessions) == 0: break self.executeSingleAction() if self.config['testing_enable_prediction_subprocess']: if self.config[ 'testing_reset_agent_period'] == 1 or self.stepsRemaining % self.config[ 'testing_reset_agent_period'] == ( self.config['testing_reset_agent_period'] - 1): self.restartOneTestingSubprocess() self.step += 1 if self.config['testing_enable_prediction_subprocess']: self.killAndJoinTestingSubprocesses() self.removeBadSessions() # Compute the code prevalence scores for all of the traces allTraces = [ trace for traceList in self.executionSessionTraces for trace in traceList ] self.agent.symbolMapper.load() self.agent.symbolMapper.computeCodePrevalenceScores(allTraces) # Ensure all the trace objects get saved to disc for trace in allTraces: self.traceSaveExecutor.submit(TestingStepManager.saveTrace, trace, self.config) self.traceSaveExecutor.shutdown() self.environment.runSessionCompletedHooks() for session in self.executionSessions: session.endTime = datetime.now() session.saveToDisk(self.config) # We shutdown the environment before generating the annotated videos in order # to conserve memory, since the environment is no longer needed after this point self.shutdownEnvironment() if len(self.executionSessions) == 0: self.testStep.status = "failed" else: self.testStep.status = "completed" self.testStep.browser = self.browser self.testStep.userAgent = self.executionSessions[0].userAgent self.testStep.endTime = datetime.now() self.testStep.executionSessions = [ session.id for session in self.executionSessions ] if len(self.executionSessions) > 0: for plugin in self.testingStepPlugins: plugin.testingStepFinished(self.testStep, self.executionSessions) for session in self.executionSessions: session.status = "completed" session.saveToDisk(self.config) self.testStep.saveToDisk(self.config) resultValue['successfulExecutionSessions'] = len( self.testStep.executionSessions) if len(self.testStep.executionSessions) == 0: resultValue['success'] = False else: resultValue['success'] = True for traceList in self.executionSessionTraceLocalPickleFiles: for fileName in traceList: os.unlink(fileName) except selenium.common.exceptions.WebDriverException: # This error just happens sometimes. It has something to do with the chrome process failing to interact correctly # with mitmproxy. Its not at all clear what causes it, but the system can't auto retry from it unless the whole container # is killed. So we just explicitly catch it here so we don't trigger an error level log message, which gets sent to slack. # The manager process will safely restart this testing step. getLogger().warning( f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}" ) resultValue['success'] = False resultValue['exception'] = traceback.format_exc() except ProxyVerificationFailed: # Handle this errors gracefully without an error level message. This happens more often when our own servers go down # then when the proxy is actually not functioning getLogger().warning( f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}" ) resultValue['success'] = False resultValue['exception'] = traceback.format_exc() except Exception as e: getLogger().error( f"Unhandled exception occurred during testing sequence:\n{traceback.format_exc()}" ) resultValue['success'] = False resultValue['exception'] = traceback.format_exc() # This print statement will trigger the parent manager process to kill this process. getLogger().info(f"Finished Running Testing Sequence!") return resultValue
def runTraining(self): success = True exception = None try: try: multiprocessing.set_start_method('spawn') except RuntimeError: pass if self.config['print_configuration_on_startup']: getLogger().info(f"Starting Training Step with configuration:\n{pformat(self.config.configData)}") else: getLogger().info(f"Starting Training Step") self.initializeGPU() self.createTrainingStep() self.loadTestingSteps() if len(self.testingSteps) == 0: errorMessage = f"Error, no test sequences to train on for training step." getLogger().warning(f"{errorMessage}") getLogger().info(f"==== Training Step Completed ====") return {"success": False, "exception": errorMessage} self.agent = DeepLearningAgent(config=self.config, whichGpu=self.gpu) self.agent.initialize() self.agent.load() self.createSubproccesses() for plugin in self.plugins: plugin.trainingStepStarted(self.trainingStep) except Exception as e: errorMessage = f"Error occurred during initiation of training! {traceback.format_exc()}" getLogger().warning(f"{errorMessage}") return {"success": False, "exception": errorMessage} try: self.threadExecutor = concurrent.futures.ThreadPoolExecutor( max_workers=self.config['training_max_batch_prep_thread_workers'] * self.config['training_batch_prep_subprocesses']) self.queueBatchesForPrecomputation() while self.trainingStep.numberOfIterationsCompleted < self.config['iterations_per_training_step']: loopStart = datetime.now() self.updateBatchPrepStarvedState() batches = self.fetchBatchesForIteration() success = self.learnFromBatches(batches) if not success: break if self.trainingStep.numberOfIterationsCompleted % self.config['training_update_target_network_every'] == (self.config['training_update_target_network_every'] - 1): getLogger().info(f"Updating the target network weights to the current primary network weights.") self.agent.updateTargetNetwork() self.trainingStep.numberOfIterationsCompleted += 1 if self.trainingStep.numberOfIterationsCompleted % self.config['print_loss_iterations'] == (self.config['print_loss_iterations'] - 1): if self.gpu is None or self.gpu == 0: getLogger().info(f"Completed {self.trainingStep.numberOfIterationsCompleted + 1} batches. Overall average time per batch: {numpy.average(self.loopTimes[-25:]):.3f}. Core learning time: {numpy.average(self.coreLearningTimes[-25:]):.3f}") self.printMovingAverageLosses() if self.config['print_cache_hit_rate']: getLogger().info(f"Batch cache hit rate {100 * numpy.mean(self.recentCacheHits[-self.config['print_cache_hit_rate_moving_average_length']:]):.0f}%") if self.trainingStep.numberOfIterationsCompleted % self.config['iterations_between_db_saves'] == (self.config['iterations_between_db_saves'] - 1): if self.gpu is None or self.gpu == 0: self.trainingStep.saveToDisk(self.config) for plugin in self.plugins: plugin.iterationCompleted(self.trainingStep) loopEnd = datetime.now() self.loopTimes.append((loopEnd - loopStart).total_seconds()) getLogger().info(f"Finished the core training loop. Saving the training step {self.trainingStep.id}") self.trainingStep.endTime = datetime.now() self.trainingStep.averageTimePerIteration = (self.trainingStep.endTime - self.trainingStep.startTime).total_seconds() / self.trainingStep.numberOfIterationsCompleted self.trainingStep.averageLoss = float(numpy.mean(self.trainingStep.totalLosses)) self.trainingStep.status = "completed" for plugin in self.plugins: plugin.trainingStepFinished(self.trainingStep) self.trainingStep.saveToDisk(self.config) self.threadExecutor.shutdown(wait=True) self.saveAgent() self.shutdownAndJoinSubProcesses() except Exception: getLogger().error(f"Error occurred while learning sequence!\n{traceback.format_exc()}") success = False exception = traceback.format_exc() finally: files = os.listdir(self.batchDirectory) for file in files: os.unlink(os.path.join(self.batchDirectory, file)) shutil.rmtree(self.batchDirectory) del self.agent # This print statement will trigger the parent manager process to kill this process. getLogger().info(f"==== Training Step Completed ====") returnData = {"trainingStepId": str(self.trainingStep.id), "success": success} if exception is not None: returnData['exception'] = exception return returnData