Beispiel #1
0
 def clean_oltp(self):
     rc, stdout, stderr = run_command(constants.OLTPBENCH_GIT_CLEAN_COMMAND,
                                      "Error: unable to clean OLTP repo")
     if rc != ErrorCode.SUCCESS:
         LOG.info(stdout.read())
         LOG.error(stderr.read())
         sys.exit(rc)
Beispiel #2
0
def kill_processes_listening_on_db_port(db_port):
    """Kills any processes that are listening on the db_port"""
    for other_pid in check_port(db_port):
        LOG.info(
            "Killing existing server instance listening on port {} [PID={}]".
            format(db_port, other_pid))
        os.kill(other_pid, signal.SIGKILL)
Beispiel #3
0
 def print_db_logs(self):
     """	
     Print out the remaining DB logs	
     """
     LOG.info("************ DB Logs Start ************")
     print_pipe(self.db_instance.db_process)
     LOG.info("************* DB Logs End *************")
    def run_benchmarks(self, enable_perf):
        """ Return 0 if all benchmarks succeed, otherwise return the error code
            code from the last benchmark to fail
        """
        if not len(self.config.benchmarks):
            LOG.error("Invlid benchmarks were specified to execute. \
                Try not specifying a benchmark and it will execute all.")
            return 0

        ret_val = 0
        benchmark_fail_count = 0

        # iterate over all benchmarks and run them
        for benchmark_count, bench_name in enumerate(self.config.benchmarks):
            LOG.info("Running '{}' with {} threads [{}/{}]".format(
                bench_name, self.config.num_threads, benchmark_count,
                len(self.config.benchmarks)))
            benchmark_ret_val = self.run_single_benchmark(
                bench_name, enable_perf)
            if benchmark_ret_val:
                ret_val = benchmark_ret_val
                benchmark_fail_count += 1

        LOG.info("{PASSED}/{TOTAL} benchmarks passed".format(
            PASSED=len(self.config.benchmarks) - benchmark_fail_count,
            TOTAL=len(self.config.benchmarks)))

        return ret_val
Beispiel #5
0
def run_command(command,
                error_msg="",
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=None,
                printable=True,
                silent_start=False):
    """
    General purpose wrapper for running a subprocess
    """
    if not silent_start:
        LOG.info(f'Running subproccess: {command}')
    p = subprocess.Popen(shlex.split(command),
                         stdout=stdout,
                         stderr=stderr,
                         cwd=cwd)

    while p.poll() is None:
        if printable:
            if stdout == subprocess.PIPE:
                out = p.stdout.readline()
                if out:
                    LOG.info(out.decode("utf-8").rstrip("\n"))

    rc = p.poll()
    return rc, p.stdout, p.stderr
Beispiel #6
0
    def run_db(self):
        """ Start the DB server """

        # Allow ourselves to try to restart the DBMS multiple times
        for attempt in range(constants.DB_START_ATTEMPTS):
            # Kill any other terrier processes that our listening on our target port
            for other_pid in check_port(self.db_port):
                LOG.info(
                    "Killing existing server instance listening on port {} [PID={}]"
                    .format(self.db_port, other_pid))
                os.kill(other_pid, signal.SIGKILL)
            ## FOR

            self.db_output_fd = open(self.db_output_file, "w+")
            self.db_process = subprocess.Popen(self.db_path,
                                               stdout=self.db_output_fd,
                                               stderr=self.db_output_fd)
            try:
                self.wait_for_db()
                break
            except:
                self.stop_db()
                LOG.error("+" * 100)
                LOG.error("DATABASE OUTPUT")
                self.print_output(self.db_output_file)
                if attempt + 1 == constants.DB_START_ATTEMPTS:
                    raise
                traceback.print_exc(file=sys.stdout)
                pass
        ## FOR
        return
Beispiel #7
0
 def download_oltp(self):
     rc, stdout, stderr = run_command(
         constants.OLTPBENCH_GIT_COMMAND,
         "Error: unable to git clone OLTP source code")
     if rc != ErrorCode.SUCCESS:
         LOG.info(stdout.read())
         LOG.error(stderr.read())
         sys.exit(rc)
Beispiel #8
0
 def print_output(self, filename):
     """ Print out contents of a file """
     fd = open(filename)
     lines = fd.readlines()
     for line in lines:
         LOG.info(line.strip())
     fd.close()
     return
Beispiel #9
0
 def build_oltp(self):
     for command in constants.OLTPBENCH_ANT_COMMANDS:
         error_msg = "Error: unable to run \"{}\"".format(command)
         rc, stdout, stderr = run_command(command, error_msg)
         if rc != ErrorCode.SUCCESS:
             LOG.info(stdout.read())
             LOG.error(stderr.read())
             sys.exit(rc)
Beispiel #10
0
def print_file(filename):
    """ Print out contents of a file """
    try:
        with open(filename) as file:
            lines = file.readlines()
            for line in lines:
                LOG.info(line.strip())
    except FileNotFoundError:
        LOG.error("file not exists: '{}'".format(filename))
Beispiel #11
0
def start_db(db_path, db_output_file):
    """
    Starts the DB process based on the DB path and write stdout and sterr
    to the db_output_file. This returns the db output file descriptor and 
    the db_process created by Popen.
    """
    db_output_fd = open(db_output_file, "w+")
    LOG.info("Server start: {PATH}".format(PATH=db_path))
    db_process = subprocess.Popen(shlex.split(db_path),
                                  stdout=db_output_fd,
                                  stderr=db_output_fd)
    return db_output_fd, db_process
Beispiel #12
0
def handle_db_connection_status(is_db_running, attempt_number, db_pid):
    """
    Based on whether the DBMS is running and whether the db_pid exists this
    will print the appropriate message or throw an error.
    """
    if not is_db_running:
        LOG.error(
            "Failed to connect to DB server [Attempt #{ATTEMPT}/{TOTAL_ATTEMPTS}]"
            .format(ATTEMPT=attempt_number,
                    TOTAL_ATTEMPTS=constants.DB_CONNECT_ATTEMPTS))
        check_db_process_exists(db_pid)
        raise RuntimeError('Unable to connect to DBMS.')
    else:
        LOG.info("Connected to server in {} seconds [PID={}]".format(
            attempt_number * constants.DB_CONNECT_SLEEP, db_pid))
Beispiel #13
0
def print_pipe(p):
    """ Print out the memory buffer of subprocess pipes """
    try:
        stdout, stderr = p.communicate()
        if stdout:
            for line in stdout.decode("utf-8").rstrip("\n").split("\n"):
                LOG.info(line)
        if stderr:
            for line in stdout.decode("utf-8").rstrip("\n").split("\n"):
                LOG.error(line)
    except ValueError:
        # This is a dirty workaround
        LOG.error("Error in subprocess communicate")
        LOG.error(
            "Known issue in CPython https://bugs.python.org/issue35182. Please upgrade the Python version."
        )
Beispiel #14
0
    def _load_data(self) -> np.ndarray:
        """
        Load data from csv
        :return: Loaded 2D numpy array of [query_id, timestamp]
        """
        LOG.info(f"Loading data from {self._query_trace_file}")
        # Load data from the files
        with open(self._query_trace_file, newline='') as csvfile:
            reader = csv.DictReader(csvfile)
            data = np.array([[int(r['query_id']),
                              int(r[' timestamp'])] for r in reader])

            if len(data) == 0:
                raise ValueError("Empty trace file")

            return data
Beispiel #15
0
 def create_and_load_db(self):
     """
     Create the database and load the data before the actual test execution.
     """
     cmd = "{BIN} -c {XML} -b {BENCHMARK} --create={CREATE} --load={LOAD}".format(
         BIN=constants.OLTPBENCH_DEFAULT_BIN,
         XML=self.xml_config,
         BENCHMARK=self.benchmark,
         CREATE=self.db_create,
         LOAD=self.db_load)
     error_msg = "Error: unable to create and load the database"
     rc, stdout, stderr = run_command(cmd,
                                     error_msg=error_msg,
                                     cwd=self.test_command_cwd)
     if rc != ErrorCode.SUCCESS:
         LOG.info(stdout.read())
         LOG.error(stderr.read())
         raise RuntimeError(error_msg)
Beispiel #16
0
    def eval(self, cid: int, model: ForecastModel) -> None:
        """
        Evaluate a fitted model on the test dataset.
        :param cid: Cluster id
        :param model: Model to use
        """
        eval_seqs = self._cluster_seqs(cid, test_mode=True, with_label=True)
        preds = []
        gts = []
        for seq, label in eval_seqs:
            pred = model.predict(seq)
            preds.append(pred)
            gts.append(label.item())

        # FIXME:
        # simple L2 norm for comparing the prediction and results
        l2norm = np.linalg.norm(np.array(preds) - np.array(gts))
        LOG.info(
            f"[{model.name}] has L2 norm(prediction, ground truth) = {l2norm}")
Beispiel #17
0
    def wait_for_db(self):
        """ Wait for the db server to come up """

        # Check that PID is running
        if not check_pid(self.db_process.pid):
            raise RuntimeError("Unable to find DBMS PID {}".format(
                self.db_process.pid))

        # Wait a bit before checking if we can connect to give the system time to setup
        time.sleep(constants.DB_START_WAIT)

        # flag to check if the db is running
        is_db_running = False

        # Keep trying to connect to the DBMS until we run out of attempts or we succeeed
        for i in range(constants.DB_CONNECT_ATTEMPTS):
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            try:
                s.connect((self.db_host, int(self.db_port)))
                s.close()
                LOG.info("Connected to server in {} seconds [PID={}]".format(
                    i * constants.DB_CONNECT_SLEEP, self.db_process.pid))
                is_db_running = True
                break
            except:
                if i > 0 and i % 20 == 0:
                    LOG.error(
                        "Failed to connect to DB server [Attempt #{}/{}]".
                        format(i, constants.DB_CONNECT_ATTEMPTS))
                    # os.system('ps aux | grep terrier | grep {}'.format(self.db_process.pid))
                    # os.system('lsof -i :15721')
                    traceback.print_exc(file=sys.stdout)
                time.sleep(constants.DB_CONNECT_SLEEP)
                continue

        if not is_db_running:
            msg = "Unable to connect to DBMS [PID={} / {}]"
            status = "RUNNING"
            if not check_pid(self.db_process.pid):
                status = "NOT RUNNING"
            msg = msg.format(self.db_process.pid, status)
            raise RuntimeError(msg)
        return
Beispiel #18
0
    def stop_db(self, is_dry_run=False):
        """ Stop the Db server and print it's log file """
        if not self.db_process or is_dry_run:
            LOG.debug('DB has already been stopped.')
            return

        # get exit code if any
        self.db_process.poll()
        if self.db_process.returncode is not None:
            # DB already terminated
            msg = f'DB terminated with return code {self.db_process.returncode}'
            LOG.info(msg)
            self.print_db_logs()
            raise RuntimeError(msg)
        else:
            # still (correctly) running, terminate it
            self.db_process.terminate()
            LOG.info("Stopped DB successfully")
        self.db_process = None
    def create_local_dirs(self):
        """ 
        This will create a directory for the build in the LOCAL_REPO_DIR. 
        Each time the microbenchmark script is run it will create another dir
        by incrementing the last dir name created. If the script is run 3 times
        the LOCAL_REPO_DIR will have directories named 001 002 003 each
        containing the json Google benchmark result file.
        """
        build_dirs = next(os.walk(LOCAL_REPO_DIR))[1]
        last_build = max(build_dirs) if build_dirs else '000'
        next_build = os.path.join(LOCAL_REPO_DIR,
                                  "{:03}".format(int(last_build) + 1))
        LOG.info(
            "Creating new result directory in local data repository {}".format(
                next_build))
        os.mkdir(next_build)

        self.last_build = os.path.basename(next_build)

        for bench_name in self.config.benchmarks:
            copy_benchmark_result(bench_name, next_build)
Beispiel #20
0
def run_command(command,
                error_msg="",
                stdout=subprocess.PIPE,
                stderr=subprocess.PIPE,
                cwd=None):
    """
    General purpose wrapper for running a subprocess
    """
    p = subprocess.Popen(shlex.split(command),
                         stdout=stdout,
                         stderr=stderr,
                         cwd=cwd)

    while p.poll() is None:
        if stdout == subprocess.PIPE:
            out = p.stdout.readline()
            if out:
                LOG.info(out.decode("utf-8").rstrip("\n"))

    rc = p.poll()
    return rc, p.stdout, p.stderr
Beispiel #21
0
    def stop_db(self):
        """ Stop the Db server and print it's log file """
        if not self.db_process:
            return

        # get exit code if any
        self.db_process.poll()
        if self.db_process.returncode is not None:
            # Db terminated already
            msg = "DB terminated with return code {}".format(
                self.db_process.returncode)
            LOG.info("DB exited with return code {}".format(
                self.db_process.returncode))
            self.print_db_logs()
            raise RuntimeError(msg)
        else:
            # still (correctly) running, terminate it
            self.db_process.terminate()
            LOG.info("DB stops normally")
            self.print_db_logs()
        self.db_process = None
        return
Beispiel #22
0
    def run_benchmarks(self, enable_perf):
        """ Runs all the microbenchmarks.
        
        Parameters
        ----------
        enable_perf : bool
            Whether perf should be enabled for all the benchmarks.

        Returns
        -------
        ret_val : int
            the return value for the last failed benchmark. If no benchmarks
            fail then it will return 0.
        """
        if not len(self.config.benchmarks):
            LOG.error('Invlid benchmarks were specified to execute. \
                Try not specifying a benchmark and it will execute all.')
            return 0

        ret_val = 0
        benchmark_fail_count = 0

        # iterate over all benchmarks and run them
        for benchmark_count, bench_name in enumerate(self.config.benchmarks):
            LOG.info(
                f"Running '{bench_name}' with {self.config.num_threads} threads [{benchmark_count}/{len(self.config.benchmarks)}]"
            )
            benchmark_ret_val = self.run_single_benchmark(
                bench_name, enable_perf)
            if benchmark_ret_val:
                ret_val = benchmark_ret_val
                benchmark_fail_count += 1

        LOG.info("{PASSED}/{TOTAL} benchmarks passed".format(
            PASSED=len(self.config.benchmarks) - benchmark_fail_count,
            TOTAL=len(self.config.benchmarks)))

        return ret_val
Beispiel #23
0
    def run_db(self, is_dry_run=False):
        """ Start the DB server """
        # Allow ourselves to try to restart the DBMS multiple times
        attempt_to_start_time = time.perf_counter()
        server_args_str = generate_server_args_str(self.server_args)
        db_run_command = f'{self.build_path} {server_args_str}'
        if is_dry_run:
            LOG.info(f'Server start command: {db_run_command}')
            return
        for attempt in range(DB_START_ATTEMPTS):
            # Kill any other noisepage processes that our listening on our target port
            # early terminate the run_db if kill_server.py encounter any exceptions
            run_kill_server(self.db_port)

            # use memory buffer to hold db logs
            self.db_process = subprocess.Popen(shlex.split(db_run_command),
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            LOG.info(
                f'Server start: {db_run_command} [PID={self.db_process.pid}]')

            if not run_check_pids(self.db_process.pid):
                LOG.info(
                    f'{self.db_process.pid} does not exist. Trying again.')
                # The DB process does not exist, try starting it again
                continue

            while True:
                raw_db_log_line = self.db_process.stdout.readline()
                if not raw_db_log_line:
                    break
                if has_db_started(raw_db_log_line, self.db_port,
                                  self.db_process.pid):
                    db_start_time = time.perf_counter()
                    LOG.info(
                        f'DB process is verified as running in {round(db_start_time - attempt_to_start_time,2)} sec'
                    )
                    return
            time.sleep(2**attempt)  # exponential backoff
        db_failed_to_start_time = time.perf_counter()
        raise RuntimeError(
            f'Failed to start DB after {DB_START_ATTEMPTS} attempts and {round(db_failed_to_start_time - attempt_to_start_time,2)} sec'
        )
Beispiel #24
0
    def _do_fit(self, train_seqs: List[Tuple[np.ndarray, np.ndarray]]) -> None:
        """
        Perform training on the time series trace data.
        :param train_seqs: Training sequences of (seq, label)
        :return: None
        """
        epochs = self._epochs
        lr = self._lr

        # Training specifics
        loss_function = nn.MSELoss()
        optimizer = torch.optim.Adam(self.parameters(), lr=lr)
        LOG.info(f"Training with {len(train_seqs)} samples, {epochs} epochs:")
        for i in range(epochs):
            for seq, labels in train_seqs:
                optimizer.zero_grad()

                self._hidden_cell = (torch.zeros(1, 1,
                                                 self._hidden_layer_size),
                                     torch.zeros(1, 1,
                                                 self._hidden_layer_size))

                seq = torch.FloatTensor(seq).view(-1)
                labels = torch.FloatTensor(labels).view(-1)

                y_pred = self(seq)

                single_loss = loss_function(y_pred, labels)
                single_loss.backward()
                optimizer.step()

            if i % 25 == 0:
                LOG.info(
                    f'[LSTM FIT]epoch: {i+1:3} loss: {single_loss.item():10.8f}'
                )

        LOG.info(
            f'[LSTM FIT]epoch: {epochs:3} loss: {single_loss.item():10.10f}')
Beispiel #25
0
    def run_db(self):
        """ Start the DB server """

        # Allow ourselves to try to restart the DBMS multiple times
        for attempt in range(constants.DB_START_ATTEMPTS):
            # Kill any other noisepage processes that our listening on our target port

            # early terminate the run_db if kill_server.py encounter any exceptions
            run_kill_server(self.db_port)

            # use memory buffer to hold db logs
            self.db_process = subprocess.Popen(shlex.split(self.db_path),
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE)
            LOG.info("Server start: {PATH} [PID={PID}]".format(
                PATH=self.db_path, PID=self.db_process.pid))

            if not run_check_pids(self.db_process.pid):
                # The DB process does not exist, try starting it again
                continue

            check_line = "[info] Listening on Unix domain socket with port {PORT} [PID={PID}]".format(
                PORT=self.db_port, PID=self.db_process.pid)
            while True:
                db_log_line_raw = self.db_process.stdout.readline()
                if not db_log_line_raw:
                    break
                db_log_line_str = db_log_line_raw.decode("utf-8").rstrip("\n")
                LOG.info(db_log_line_str)
                if db_log_line_str.endswith(check_line):
                    LOG.info("DB process is verified as running")
                    return

        msg = "Failed to start DB after {} attempts".format(
            constants.DB_START_ATTEMPTS)
        raise RuntimeError(msg)
Beispiel #26
0
                        choices=PERFORMANCE_STORAGE_SERVICE_API.keys(),
                        help="Environment in which to store performance results")

    parser.add_argument("--publish-username",
                        type=str,
                        help="Performance Storage Service Username")

    parser.add_argument("--publish-password",
                        type=str,
                        help="Performance Storage Service password")

    args = parser.parse_args()

    if args.debug:
        LOG.setLevel(logging.DEBUG)

    # Get the BaseBinaryMetricsCollector subclasses imported from binary_metrics.binary_metrics_collectors
    # Effectively this adds each binary metric collector class into an array to be instantiated later.
    collectors = [obj for obj in BaseArtifactStatsCollector.__subclasses__()]
    exit_code, aggregated_metrics = collect_artifact_stats(collectors)

    if not exit_code:
        LOG.info(f'Artifact stats: {aggregated_metrics}')

        if args.publish_results != 'none':
            report_artifact_stats_result(args.publish_results, aggregated_metrics,
                                         args.publish_username, args.publish_password)

    logging.shutdown()
    sys.exit(exit_code)
Beispiel #27
0
                                eval_size=args.eval_size,
                                horizon_len=args.horizon_len)

        models = forecaster.train(models_kwargs)

        # Save the model
        if args.model_save_path:
            with open(args.model_save_path, "wb") as f:
                pickle.dump(models, f)
    else:
        # Do inference on a trained model
        with open(args.model_load_path, "rb") as f:
            models = pickle.load(f)

        forecaster = Forecaster(trace_file=args.test_file,
                                test_mode=True,
                                interval_us=INTERVAL_MICRO_SEC,
                                seq_len=args.seq_len,
                                eval_size=args.eval_size,
                                horizon_len=args.horizon_len)

        # FIXME:
        # Assuming all the queries in the current trace file are from
        # the same cluster for now
        query_pred = forecaster.predict(0, models[0][args.test_model])

        # TODO:
        # How are we consuming predictions?
        for qid, ts in query_pred.items():
            LOG.info(f"[Query: {qid}] pred={ts[:10]}")
Beispiel #28
0
def print_output(filename):
    """ Print out contents of a file """
    with open(filename) as file:
        lines = file.readlines()
        for line in lines:
            LOG.info(line.strip())
Beispiel #29
0
def check_db_process_exists(db_pid):
    """ Checks to see if the db_pid exists """
    if not check_pid(db_pid):
        raise RuntimeError("Unable to find DBMS PID {}".format(db_pid))
    else:
        LOG.info("DBMS running on PID {}".format(db_pid))
Beispiel #30
0
 def print_output(self, filename):
     """ Print out contents of a file """
     with open(filename, "r") as fd:
         LOG.info("Output:\n" +
                  "\n".join([line.strip() for line in fd.readlines()]))
     return