def docker_check_impl() -> None: issues = check_local_docker_containers() if not issues: click.secho("✔ No issues detected", fg="green") else: _print_issue_list_and_exit(issues, "The following issues were detected:")
def ingest_sample_data(path: Optional[str]) -> None: """Ingest sample data into a running DataHub instance.""" if path is None: click.echo("Downloading sample data...") with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as tmp_file: path = str(pathlib.Path(tmp_file.name)) # Download the bootstrap MCE file from GitHub. mce_json_download_response = requests.get( GITHUB_BOOTSTRAP_MCES_URL) mce_json_download_response.raise_for_status() tmp_file.write(mce_json_download_response.content) click.echo(f"Downloaded to {path}") # Verify that docker is up. issues = check_local_docker_containers() if issues: _print_issue_list_and_exit( issues, header="Docker is not ready:", footer="Try running `datahub docker quickstart` first", ) # Run ingestion. click.echo("Starting ingestion...") pipeline = Pipeline.create({ "source": { "type": "file", "config": { "filename": path, }, }, "sink": { "type": "datahub-rest", "config": { "server": "http://localhost:8080" }, }, }) pipeline.run() ret = pipeline.pretty_print_summary() sys.exit(ret)
def quickstart( version: str, build_locally: bool, quickstart_compose_file: List[pathlib.Path], dump_logs_on_failure: bool, ) -> None: """Start an instance of DataHub locally using docker-compose. This command will automatically download the latest docker-compose configuration from GitHub, pull the latest images, and bring up the DataHub system. There are options to override the docker-compose config file, build the containers locally, and dump logs to the console or to a file if something goes wrong. """ # Run pre-flight checks. issues = check_local_docker_containers(preflight_only=True) if issues: _print_issue_list_and_exit(issues, "Unable to run quickstart:") quickstart_compose_file = list( quickstart_compose_file) # convert to list from tuple if not quickstart_compose_file: click.echo("Fetching docker-compose file from GitHub") with tempfile.NamedTemporaryFile(suffix=".yml", delete=False) as tmp_file: path = pathlib.Path(tmp_file.name) quickstart_compose_file.append(path) # Download the quickstart docker-compose file from GitHub. quickstart_download_response = requests.get( GITHUB_QUICKSTART_COMPOSE_URL) quickstart_download_response.raise_for_status() tmp_file.write(quickstart_download_response.content) # set version os.environ["DATAHUB_VERSION"] = version base_command: List[str] = [ "docker-compose", *itertools.chain.from_iterable( ("-f", f"{path}") for path in quickstart_compose_file), "-p", "datahub", ] # Pull and possibly build the latest containers. subprocess.run( [ *base_command, "pull", ], check=True, ) if build_locally: subprocess.run( [ *base_command, "build", "--pull", ], check=True, env={ **os.environ, "DOCKER_BUILDKIT": "1", }, ) # Start it up! (with retries) max_wait_time = datetime.timedelta(minutes=6) start_time = datetime.datetime.now() sleep_interval = datetime.timedelta(seconds=2) up_interval = datetime.timedelta(seconds=30) up_attempts = 0 while (datetime.datetime.now() - start_time) < max_wait_time: # Attempt to run docker-compose up every minute. if (datetime.datetime.now() - start_time) > up_attempts * up_interval: click.echo() subprocess.run(base_command + ["up", "-d"]) up_attempts += 1 # Check docker health every few seconds. issues = check_local_docker_containers() if not issues: break # Wait until next iteration. click.echo(".", nl=False) time.sleep(sleep_interval.total_seconds()) else: # Falls through if the while loop doesn't exit via break. click.echo() with tempfile.NamedTemporaryFile(suffix=".log", delete=False) as log_file: ret = subprocess.run( base_command + ["logs"], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, check=True, ) log_file.write(ret.stdout) if dump_logs_on_failure: with open(log_file.name, "r") as logs: click.echo("Dumping docker-compose logs:") click.echo(logs.read()) click.echo() _print_issue_list_and_exit( issues, header= "Unable to run quickstart - the following issues were detected:", footer= "If you think something went wrong, please file an issue at https://github.com/linkedin/datahub/issues\n" "or send a message in our Slack https://slack.datahubproject.io/\n" f"Be sure to attach the logs from {log_file.name}", ) # Handle success condition. click.echo() click.secho("✔ DataHub is now running", fg="green") click.secho( "Ingest some demo data using `datahub docker ingest-sample-data`,\n" "or head to http://localhost:9002 (username: datahub, password: datahub) to play around with the frontend.", fg="green", )