def download_apache_avro(): """ Currently we need to download the Apache Avro manually to avoid test failure caused by the avro format sql jar. See https://issues.apache.org/jira/browse/FLINK-17417. If the issue is fixed, this method could be removed. Using maven command copy the jars in repository to avoid accessing external network. """ flink_source_root = _find_flink_source_root() avro_jar_pattern = os.path.join(flink_source_root, "flink-formats", "flink-avro", "target", "avro*.jar") if len(glob.glob(avro_jar_pattern)) > 0: # the avro jar already existed, just return. return mvn = "mvn.cmd" if on_windows() else "mvn" avro_version_output = check_output( [mvn, "help:evaluate", "-Dexpression=avro.version"], cwd=flink_source_root).decode("utf-8") lines = avro_version_output.replace("\r", "").split("\n") avro_version = None for line in lines: if line.strip() != "" and re.match(r'^[0-9]+\.[0-9]+(\.[0-9]+)?$', line.strip()): avro_version = line break if avro_version is None: raise Exception( "The Apache Avro version is not found in the maven command output:\n %s" % avro_version_output) check_output([ mvn, "org.apache.maven.plugins:maven-dependency-plugin:2.10:copy", "-Dartifact=org.apache.avro:avro:%s:jar" % avro_version, "-DoutputDirectory=%s/flink-formats/flink-avro/target" % flink_source_root ], cwd=flink_source_root)
def construct_test_classpath(): test_jar_patterns = [ "flink-runtime/target/flink-runtime*tests.jar", "flink-streaming-java/target/flink-streaming-java*tests.jar", "flink-formats/flink-sql-avro/target/flink-sql-avro*.jar", "flink-formats/flink-sql-parquet/target/flink-sql-parquet*.jar", "flink-formats/flink-json/target/flink-json*.jar", "flink-connectors/flink-sql-connector-kafka/target/flink-sql-connector-kafka*.jar", "flink-connectors/flink-sql-connector-elasticsearch7/target/flink-sql-connector-*.jar", "flink-connectors/flink-sql-connector-pulsar/target/flink-sql-connector-*.jar", "flink-connectors/flink-sql-connector-rabbitmq/target/flink-sql-connector-*.jar", "flink-connectors/flink-sql-connector-kinesis/target/flink-sql-connector-*.jar", "flink-connectors/flink-sql-connector-aws-kinesis-firehose/target/flink-sql-connector*.jar", "flink-connectors/flink-connector-jdbc/target/flink-connector-*.jar", "flink-connectors/flink-connector-files/target/flink-connector-*.jar", "flink-connectors/flink-connector-sink-common/target/flink-connector-*.jar", "flink-connectors/flink-connector-cassandra/target/flink-connector-*.jar", "flink-python/target/artifacts/testDataStream.jar", "flink-python/target/flink-python*-tests.jar", ("flink-state-backends/flink-statebackend-rocksdb/target/" "flink-statebackend-rocksdb*tests.jar"), ] test_jars = [] flink_source_root = _find_flink_source_root() for pattern in test_jar_patterns: pattern = pattern.replace("/", os.path.sep) test_jars += glob.glob(os.path.join(flink_source_root, pattern)) return os.path.pathsep.join(test_jars)
def test_add_classpaths(self): # find kafka connector jars flink_source_root = _find_flink_source_root() jars_abs_path = flink_source_root + '/flink-connectors/flink-sql-connector-kafka' specific_jars = glob.glob(jars_abs_path + '/target/flink*.jar') specific_jars = [ 'file://' + specific_jar for specific_jar in specific_jars ] self.env.add_classpaths(*specific_jars) source_topic = 'test_source_topic' props = { 'bootstrap.servers': 'localhost:9092', 'group.id': 'test_group' } type_info = Types.ROW([Types.INT(), Types.STRING()]) # Test for kafka consumer deserialization_schema = JsonRowDeserializationSchema.builder() \ .type_info(type_info=type_info).build() # It Will raise a ClassNotFoundException if the kafka connector is not added into the # pipeline classpaths. kafka_consumer = FlinkKafkaConsumer(source_topic, deserialization_schema, props) self.env.add_source(kafka_consumer).print() self.env.get_execution_plan()
def _ensure_initialized(cls): if TestTableSink._inited: return FLINK_SOURCE_ROOT_DIR = _find_flink_source_root() filename_pattern = ("flink-table/flink-table-planner/target/" "flink-table-planner*-tests.jar") if not glob.glob(os.path.join(FLINK_SOURCE_ROOT_DIR, filename_pattern)): raise unittest.SkipTest( "'flink-table-planner*-tests.jar' is not available. Will skip the related tests." ) gateway = get_gateway() java_import( gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestAppendSink") java_import( gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestRetractSink") java_import( gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestUpsertSink") java_import( gateway.jvm, "org.apache.flink.table.runtime.stream.table.RowCollector") TestTableSink._inited = True
def _load_specific_flink_module_jars(jars_relative_path): flink_source_root = _find_flink_source_root() jars_abs_path = flink_source_root + jars_relative_path specific_jars = glob.glob(jars_abs_path + '/target/flink*.jar') specific_jars = [ 'file://' + specific_jar for specific_jar in specific_jars ] add_jars_to_context_class_loader(specific_jars)
def get_jar_path(self, jar_path_pattern): test_jar_path = glob.glob(os.path.join(_find_flink_source_root(), jar_path_pattern)) if not test_jar_path: self.fail("'%s' is not available. Please compile the test jars first." % jar_path_pattern) if len(test_jar_path) > 1: self.fail("There are multiple jars matches the pattern: %s, the jars are: %s" % (jar_path_pattern, test_jar_path)) return test_jar_path[0]
def construct_test_classpath(): test_jar_patterns = [ "flink-python/target/test-dependencies/*", "flink-python/target/artifacts/testDataStream.jar", "flink-python/target/flink-python*-tests.jar", ] test_jars = [] flink_source_root = _find_flink_source_root() for pattern in test_jar_patterns: pattern = pattern.replace("/", os.path.sep) test_jars += glob.glob(os.path.join(flink_source_root, pattern)) return os.path.pathsep.join(test_jars)
def ensure_jar_not_loaded(self, func_class_name, jar_filename_pattern): test_jars = glob.glob(os.path.join(_find_flink_source_root(), jar_filename_pattern)) if not test_jars: self.fail("'%s' is not available. Please compile the test jars first." % jar_filename_pattern) try: self.t_env.register_java_function("func", func_class_name) except Py4JJavaError: pass else: self.fail("The scalar function '%s' should not be able to be loaded. Please remove " "the '%s' from the classpath of the PythonGatewayServer process." % (func_class_name, jar_filename_pattern))
def _ensure_initialized(cls): if MLTestCase._inited: return flink_source_root_dir = _find_flink_source_root() api_path_pattern = ( "flink-ml-parent/flink-ml-api/target/flink-ml-api*-SNAPSHOT.jar") lib_path_pattern = ( "flink-ml-parent/flink-ml-lib/target/flink-ml-lib*-SNAPSHOT.jar") MLTestCase._ensure_path(os.path.join(flink_source_root_dir, api_path_pattern)) MLTestCase._ensure_path(os.path.join(flink_source_root_dir, lib_path_pattern)) MLTestCase._inited = True
def construct_test_classpath(): test_jar_patterns = [ "flink-runtime/target/flink-runtime*tests.jar", "flink-streaming-java/target/flink-streaming-java*tests.jar", "flink-formats/flink-csv/target/flink-csv*.jar", "flink-formats/flink-avro/target/flink-avro*.jar", "flink-formats/flink-avro/target/avro*.jar", "flink-formats/flink-json/target/flink-json*.jar", "flink-python/target/artifacts/testDataStream.jar", "flink-python/target/flink-python*-tests.jar", ] test_jars = [] flink_source_root = _find_flink_source_root() for pattern in test_jar_patterns: pattern = pattern.replace("/", os.path.sep) test_jars += glob.glob(os.path.join(flink_source_root, pattern)) return os.path.pathsep.join(test_jars)
def construct_test_classpath(): test_jar_patterns = [ "flink-table/flink-table-planner/target/flink-table-planner*-tests.jar", "flink-runtime/target/flink-runtime*tests.jar", "flink-streaming-java/target/flink-streaming-java*tests.jar", "flink-formats/flink-csv/target/flink-csv*.jar", "flink-formats/flink-avro/target/flink-avro*.jar", "flink-formats/flink-avro/target/avro*.jar", "flink-formats/flink-json/target/flink-json*.jar", "flink-ml-parent/flink-ml-api/target/flink-ml-api*.jar", "flink-ml-parent/flink-ml-lib/target/flink-ml-lib*.jar", "flink-python/target/data-stream-test/flink*.jar", ] test_jars = [] flink_source_root = _find_flink_source_root() for pattern in test_jar_patterns: pattern = pattern.replace("/", os.path.sep) test_jars += glob.glob(os.path.join(flink_source_root, pattern)) return os.path.pathsep.join(test_jars)
def _ensure_initialized(cls): if TestTableSink._inited: return FLINK_SOURCE_ROOT_DIR = _find_flink_source_root() filename_pattern = ( "flink-table/flink-table-planner/target/" "flink-table-planner*-tests.jar") if not glob.glob(os.path.join(FLINK_SOURCE_ROOT_DIR, filename_pattern)): raise unittest.SkipTest( "'flink-table-planner*-tests.jar' is not available. Will skip the related tests.") gateway = get_gateway() java_import(gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestAppendSink") java_import(gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestRetractSink") java_import(gateway.jvm, "org.apache.flink.table.runtime.stream.table.TestUpsertSink") java_import(gateway.jvm, "org.apache.flink.table.runtime.stream.table.RowCollector") TestTableSink._inited = True
def _ensure_initialized(cls): if TestTableSink._inited: return FLINK_SOURCE_ROOT_DIR = _find_flink_source_root() filename_pattern = ("flink-python/target/flink-python*-tests.jar") if not glob.glob(os.path.join(FLINK_SOURCE_ROOT_DIR, filename_pattern)): raise unittest.SkipTest( "'flink-python*-tests.jar' is not available. Will skip the related tests." ) gateway = get_gateway() java_import( gateway.jvm, "org.apache.flink.table.utils.TestingSinks$TestAppendingSink") java_import(gateway.jvm, "org.apache.flink.table.utils.TestingSinks$RowCollector") TestTableSink._inited = True
def get_jar_url(jar_filename_pattern): test_jars = glob.glob( os.path.join(_find_flink_source_root(), jar_filename_pattern)) return [pathlib.Path(jar_path).as_uri() for jar_path in test_jars]