def setUp(self): super().setUp() self.patch('app.util.fs.create_dir') self.patch('app.util.fs.async_delete') self.patch('os.makedirs') self.mock_slave_allocator = self.patch( 'app.master.cluster_master.SlaveAllocator').return_value self.mock_scheduler_pool = self.patch( 'app.master.cluster_master.BuildSchedulerPool').return_value # Two threads are ran everytime we start up the ClusterMaster. We redirect the calls to # `ThreadPoolExecutor.submit` through a mock proxy so we can capture events. self.thread_pool_executor = ThreadPoolExecutor(max_workers=2) self._thread_pool_executor_cls = self.patch( 'app.master.cluster_master.ThreadPoolExecutor') self._thread_pool_executor_cls.return_value.submit.side_effect = \ self.thread_pool_executor.submit Configuration['pagination_offset'] = self._PAGINATION_OFFSET Configuration['pagination_limit'] = self._PAGINATION_LIMIT Configuration['pagination_max_limit'] = self._PAGINATION_MAX_LIMIT Configuration['database_name'] = TEST_DB_NAME Configuration['database_url'] = TEST_DB_URL Connection.create(Configuration['database_url']) BuildStore._cached_builds_by_id.clear()
def setUp(self): super().setUp() self.patch('app.master.build.util.create_project_type' ).return_value = self._create_mock_project_type() Configuration['database_name'] = TEST_DB_NAME Configuration['database_url'] = TEST_DB_URL Connection.create(Configuration['database_url']) BuildStore._cached_builds_by_id.clear()
def __init__(self, verbose=False): """ :param verbose: If true, output from the master and slave processes is allowed to pass through to stdout. :type verbose: bool """ self._verbose = verbose self._logger = log.get_logger(__name__) self.master = None self.slaves = [] self._master_eventlog_name = None self._slave_eventlog_names = [] self._next_slave_port = self._SLAVE_START_PORT self._clusterrunner_repo_dir = dirname( dirname(dirname(dirname(realpath(__file__))))) self._app_executable = join(self._clusterrunner_repo_dir, 'main.py') self._master_app_base_dir = None self._slaves_app_base_dirs = [] Connection.create(TEST_DB_URL)
def clean_up(cls): """ Save current state of all cached builds. """ with Connection.get() as session: cls._logger.notice('Executing clean up process.') for build_id in cls._cached_builds_by_id: build = cls._cached_builds_by_id[build_id] # As master shuts down, mark any uncompleted jobs as failed if build._status() != BuildState.FINISHED: build.mark_failed( 'Master service was shut down before this build could complete.' ) build.save()
def create_app(): """Construct the core application.""" app = application.getApp(os.getenv('APP_ENV')) # Imports # from . import routes # Create tables for our models with app.app_context(): application.db = Connection(MYSQL_HOST, MYSQL_POST, MYSQL_USER, MYSQL_PASS) application.db.connect() return app
def __init__(self): self._logger = get_logger(__name__) self._master_results_path = Configuration['results_directory'] self._all_slaves_by_url = {} self._scheduler_pool = BuildSchedulerPool() self._build_request_handler = BuildRequestHandler(self._scheduler_pool) self._build_request_handler.start() self._slave_allocator = SlaveAllocator(self._scheduler_pool) self._slave_allocator.start() # Initialize the database connection before we initialize a BuildStore Connection.create(Configuration['database_url']) UnhandledExceptionHandler.singleton().add_teardown_callback( BuildStore.clean_up) # The best practice for determining the number of threads to use is # the number of threads per core multiplied by the number of physical # cores. So for example, with 10 cores, 2 sockets and 2 per core, the # max would be 40. # # Currently we use threads for incrementing/decrementing slave executor # counts (lock acquisition) and tearing down the slave (network IO). 32 threads should be # plenty for these tasks. In the case of heavy load, the bottle neck will be the number # of executors, not the time it takes to lock/unlock the executor counts or the number of # teardown requests. Tweak the number to find the sweet spot if you feel this is the case. self._thread_pool_executor = ThreadPoolExecutor(max_workers=32) # Asynchronously delete (but immediately rename) all old builds when master starts. # Remove this if/when build numbers are unique across master starts/stops # TODO: We can remove this code since we persist builds across master restarts # if os.path.exists(self._master_results_path): # fs.async_delete(self._master_results_path) # fs.create_dir(self._master_results_path) SlavesCollector.register_slaves_metrics_collector( lambda: self.all_slaves_by_id().values())
def execute(app): # cate_trans = CategoryTranslate(application.db) # rows = cate_trans.getCategories() # workbook = xlsxwriter.Workbook('categories.freeze.xlsx') # worksheet = workbook.add_worksheet() # worksheet.write('A1', 'Categories') # # worksheet.freeze_panes(1, 0) # worksheet.freeze_panes(1, 1) # i = 3 # for row in rows: # worksheet.write('B'+str(i), row[3]) # i += 1 # workbook.close() # Begin JPM tunnelJPM = SSHTunnel() tunnelJPM.forwarder( { "host": os.getenv('THPM_SSH_HOST'), "port": os.getenv('THPM_SSH_PORT') }, ssh_username=os.getenv('THPM_SSH_USER'), ssh_password=os.getenv('THPM_SSH_PASSWORD'), remote={ "bind_address": os.getenv('LOCAL'), "bind_port": os.getenv('THPM_MYSQL_PORT') }) try: tunnelJPM.start() with app.app_context(): application.db = Connection( host=os.getenv('LOCAL'), port=tunnelJPM.get_local_bind_port(), user=os.getenv('THPM_MYSQL_USER'), password=os.getenv('THPM_MYSQL_PASSWORD'), dbname='') try: application.db.connect() # cate_trans = CategoryTranslate(application.db) # rows = cate_trans.getCategories() egProduct = EgProduct(application.db) #1 # rows = egProduct.getTotalProductByType() # 2 # rows = egProduct.getTotalBrand() #3 # rows = egProduct.getProductTop10Brand() #4 # rows = egProduct.getTotalMoto() #5 # rows = egProduct.getProductTop10Moto() #6 # rows = egProduct.getTotalCategories() #7 # rows = egProduct.getTotalProductTop10Cate() #8 #rows = egProduct.getModelIncorrect() #9 # rows = egProduct.getBrandincorrect() #10 # rows = egProduct.getInsertUpdateModel() rows = egProduct.compareShippingPoint() print(rows) finally: application.db.closed() except Exception as e: print("Error connecting SSH", e) finally: tunnelJPM.stop()
def __init__(self): self.connection = Connection() self.query = QSqlQuery(self.connection.connection)
def load_from_db(cls, build_id): """ Given a build_id, fetch all the stored information from the database to reconstruct a Build object to represent that build. :param build_id: The id of the build to recreate. """ with Connection.get() as session: build_schema = session.query(BuildSchema).filter(BuildSchema.build_id == build_id).first() failed_artifact_directories_schema = session.query(FailedArtifactDirectoriesSchema) \ .filter(FailedArtifactDirectoriesSchema.build_id == build_id) \ .all() failed_subjob_atom_pairs_schema = session.query(FailedSubjobAtomPairsSchema) \ .filter(FailedSubjobAtomPairsSchema.build_id == build_id) \ .all() atoms_schema = session.query(AtomsSchema).filter(AtomsSchema.build_id == build_id).all() subjobs_schema = session.query(SubjobsSchema).filter(SubjobsSchema.build_id == build_id).all() # If a query returns None, then we know the build wasn't found in the database if not build_schema: return None build_parameters = json.loads(build_schema.build_parameters) # Genereate a BuildRequest object with our query response build_request = BuildRequest(build_parameters) # Create initial Build object, we will be altering the state of this as we get more data build = Build(build_request) build._build_id = build_id # Manually generate ProjectType object for build and create a `job_config` since this is usually done in `prepare()` build.generate_project_type() job_config = build.project_type.job_config() # Manually update build data build._artifacts_tar_file = build_schema.artifacts_tar_file build._artifacts_zip_file = build_schema.artifacts_zip_file build._error_message = build_schema.error_message build._postbuild_tasks_are_finished = bool(int(build_schema.postbuild_tasks_are_finished)) build.setup_failures = build_schema.setup_failures build._timing_file_path = build_schema.timing_file_path # Manually set the state machine timestamps build._state_machine._transition_timestamps = { BuildState.QUEUED: build_schema.queued_ts, BuildState.FINISHED: build_schema.finished_ts, BuildState.PREPARED: build_schema.prepared_ts, BuildState.PREPARING: build_schema.preparing_ts, BuildState.ERROR: build_schema.error_ts, BuildState.CANCELED: build_schema.canceled_ts, BuildState.BUILDING: build_schema.building_ts } build._state_machine._fsm.current = BuildState[build_schema.state] build_artifact = BuildArtifact(build_schema.build_artifact_dir) directories = [] for directory in failed_artifact_directories_schema: directories.append(directory.failed_artifact_directory) build_artifact._failed_artifact_directories = directories pairs = [] for pair in failed_subjob_atom_pairs_schema: pairs.append((pair.subjob_id, pair.atom_id)) build_artifact._q_failed_subjob_atom_pairs = pairs build._build_artifact = build_artifact atoms_by_subjob_id = {} for atom in atoms_schema: atoms_by_subjob_id.setdefault(atom.subjob_id, []) atoms_by_subjob_id[atom.subjob_id].append(Atom( atom.command_string, atom.expected_time, atom.actual_time, atom.exit_code, atom.state, atom.atom_id, atom.subjob_id )) subjobs = OrderedDict() for subjob in subjobs_schema: atoms = atoms_by_subjob_id[subjob.subjob_id] # Add atoms after subjob is created so we don't alter their state on initialization subjob_to_add = Subjob(build_id, subjob.subjob_id, build.project_type, job_config, []) subjob_to_add._atoms = atoms subjob_to_add.completed = subjob.completed subjobs[subjob.subjob_id] = subjob_to_add build._all_subjobs_by_id = subjobs # Place subjobs into correct queues within the build build._unstarted_subjobs = Queue(maxsize=len(subjobs)) build._finished_subjobs = Queue(maxsize=len(subjobs)) for _, subjob in subjobs.items(): if subjob.completed: build._finished_subjobs.put(subjob) else: build._unstarted_subjobs.put(subjob) return build
def save(self): """Serialize the Build object and update all of the parts to the database.""" with Connection.get() as session: build_schema = session.query(BuildSchema).filter(BuildSchema.build_id == self._build_id).first() failed_artifact_directories_schema = session.query(FailedArtifactDirectoriesSchema) \ .filter(FailedArtifactDirectoriesSchema.build_id == self._build_id) \ .all() failed_subjob_atom_pairs_schema = session.query(FailedSubjobAtomPairsSchema) \ .filter(FailedSubjobAtomPairsSchema.build_id == self._build_id) \ .all() atoms_schema = session.query(AtomsSchema).filter(AtomsSchema.build_id == self._build_id).all() subjobs_schema = session.query(SubjobsSchema).filter(SubjobsSchema.build_id == self._build_id).all() # If this wasn't found, it's safe to assume that the build doesn't exist within the database if build_schema is None: raise ItemNotFoundError('Unable to find build (id: {}) in database.'.format(self._build_id)) build_schema.artifacts_tar_file = self._artifacts_tar_file build_schema.artifacts_zip_file = self._artifacts_zip_file build_schema.error_message = self._error_message build_schema.postbuild_tasks_are_finished = self._postbuild_tasks_are_finished build_schema.setup_failures = self.setup_failures build_schema.timing_file_path = self._timing_file_path build_artifact_dir = None if self._build_artifact is not None: build_artifact_dir = self._build_artifact.build_artifact_dir build_schema.build_artifact_dir = build_artifact_dir if self._build_artifact is not None: # Clear all old directories session.query(FailedArtifactDirectoriesSchema) \ .filter(FailedArtifactDirectoriesSchema.build_id == self._build_id) \ .delete() # Commit changes so we don't delete the newly added rows later session.commit() # Add all the updated versions of the directories for directory in self._build_artifact._get_failed_artifact_directories(): failed_artifact_directory = FailedArtifactDirectoriesSchema( build_id=self._build_id, failed_artifact_directory=directory ) session.add(failed_artifact_directory) if self._build_artifact is not None: # Clear all old directories session.query(FailedSubjobAtomPairsSchema) \ .filter(FailedSubjobAtomPairsSchema.build_id == self._build_id) \ .delete() # Commit changes so we don't delete the newly added rows later session.commit() # Add all the updated versions of the data for subjob_id, atom_id in self._build_artifact.get_failed_subjob_and_atom_ids(): failed_subjob_and_atom_ids = FailedSubjobAtomPairsSchema( build_id=self._build_id, subjob_id=subjob_id, atom_id=atom_id ) session.add(failed_subjob_and_atom_ids) build_schema.build_parameters = json.dumps(self._build_request.build_parameters()) fsm_timestamps = {state.lower(): timestamp for state, timestamp in self._state_machine.transition_timestamps.items()} build_schema.state = self._status() build_schema.queued_ts = fsm_timestamps['queued'] build_schema.finished_ts = fsm_timestamps['finished'] build_schema.prepared_ts = fsm_timestamps['prepared'] build_schema.preparing_ts = fsm_timestamps['preparing'] build_schema.error_ts = fsm_timestamps['error'] build_schema.canceled_ts = fsm_timestamps['canceled'] build_schema.building_ts = fsm_timestamps['building'] # Subjobs # Clear all old Subjobs and Atoms session.query(SubjobsSchema) \ .filter(SubjobsSchema.build_id == self._build_id) \ .delete() session.query(AtomsSchema) \ .filter(AtomsSchema.build_id == self._build_id) \ .delete() # Commit changes so we don't delete the newly added rows later session.commit() # Add all the updated versions of Subjobs and Atoms subjobs = self._all_subjobs_by_id for subjob_id in subjobs: subjob = self._all_subjobs_by_id[subjob_id] subjob_schema = SubjobsSchema( subjob_id=subjob_id, build_id=self._build_id, completed=subjob.completed ) session.add(subjob_schema) # Atoms for atom in subjob._atoms: atom_schema = AtomsSchema( atom_id=atom.id, build_id=self._build_id, subjob_id=subjob_id, command_string=atom.command_string, expected_time=atom.expected_time, actual_time=atom.actual_time, exit_code=atom.exit_code, state=atom.state ) session.add(atom_schema)
def _store_build(cls, build: Build) -> int: """ Serialize a Build object and commit all of the parts to the database, and then return the build_id that was assigned after committing. :param build: The build to store into the database. """ with Connection.get() as session: build_params = build._build_request._build_parameters fsm_timestamps = { state.lower(): timestamp for state, timestamp in build._state_machine.transition_timestamps.items() } build_artifact_dir = None if build._build_artifact is not None: build_artifact_dir = build._build_artifact.build_artifact_dir build_schema = BuildSchema( artifacts_tar_file=build._artifacts_tar_file, artifacts_zip_file=build._artifacts_zip_file, error_message=build._error_message, postbuild_tasks_are_finished=bool( build._postbuild_tasks_are_finished), setup_failures=build.setup_failures, timing_file_path=build._timing_file_path, build_artifact_dir=build_artifact_dir, build_parameters=json.dumps( build._build_request.build_parameters()), state=build._status(), queued_ts=fsm_timestamps['queued'], finished_ts=fsm_timestamps['finished'], prepared_ts=fsm_timestamps['prepared'], preparing_ts=fsm_timestamps['preparing'], error_ts=fsm_timestamps['error'], canceled_ts=fsm_timestamps['canceled'], building_ts=fsm_timestamps['building']) session.add(build_schema) # Commit this first to get the build_id created by the database # We use this build_id to store the other parts of a Build object session.commit() build_id = build_schema.build_id # FailedArtifactDirectories if build._build_artifact is not None: for directory in build._build_artifact._get_failed_artifact_directories( ): failed_artifact_directories_schema = FailedArtifactDirectoriesSchema( build_id=build_id, failed_artifact_directory=directory) session.add(failed_artifact_directories_schema) # FailedSubjobAtomPairs if build._build_artifact is not None: for subjob_id, atom_id in build._build_artifact.get_failed_subjob_and_atom_ids( ): failed_subjob_atom_pairs_schema = FailedSubjobAtomPairsSchema( build_id=build_id, subjob_id=subjob_id, atom_id=atom_id) session.add(failed_subjob_atom_pairs_schema) # Subjobs subjobs = build._all_subjobs_by_id for subjob_id in subjobs: subjob = build._all_subjobs_by_id[subjob_id] subjob_schema = SubjobsSchema(subjob_id=subjob_id, build_id=build_id, completed=subjob.completed) session.add(subjob_schema) # Atoms for atom in subjob._atoms: # pylint: disable=protected-access atom_schema = AtomsSchema( atom_id=atom.id, build_id=build_id, subjob_id=subjob_id, command_string=atom.command_string, expected_time=atom.expected_time, actual_time=atom.actual_time, exit_code=atom.exit_code, state=atom.state, ) session.add(atom_schema) return build_id
def count_all_builds(cls) -> int: """ Return the total amount of builds stored in the database. """ with Connection.get() as session: return session.query(BuildSchema).count()