Esempio n. 1
0
def package_spark(source_dir: str, target_file: str) -> int:
    if not os.path.isdir(source_dir):
        eprint(source_dir + " is not a valid directory")
        return 1

    req_file = os.path.join(source_dir, "requirements.txt")
    if not os.path.exists(req_file):
        eprint("requirements.txt not found in " + source_dir)
        return 1

    tmp_dir = tempfile.mkdtemp()

    logging.debug("copying app files to " + tmp_dir)
    copy_tree(source_dir, tmp_dir)

    logging.debug("resolving pip dependencies into " + tmp_dir)
    pip_out = _resolve_pip_dependencies(tmp_dir)
    if pip_out != 0:
        eprint("there was an error processing " + req_file)
        return 1

    logging.debug("creating package " + target_file)
    _create_zip(tmp_dir, target_file)
    print("spark app generated successfully: " + os.path.abspath(target_file))

    return 0
Esempio n. 2
0
def transform(runner: Optional[str], stream_dtc_file: Optional[str],
              window_dtc_file: Optional[str], data_processor: Optional[str],
              raw_json_files: List[str]) -> int:
    if stream_dtc_file is None and window_dtc_file is None:
        stream_dtc_file, window_dtc_file = get_stream_window_dtc_files(
            get_valid_yml_files(get_yml_files()))

    if stream_dtc_file is None:
        eprint('Streaming DTC file not provided and could not be found in '
               'the current directory.')
        return 1

    if not runner:
        runner = 'local'

    if not data_processor:
        data_processor = 'simple'

    if runner not in RUNNER_CLASS:
        eprint('Unknown runner: \'{}\'. Possible values: {}'.format(
            runner, list(RUNNER_CLASS.keys())))
        return 1

    if data_processor not in DATA_PROCESSOR_CLASS:
        eprint('Unknown data-processor: \'{}\'. Possible values: {}'.format(
            runner, list(DATA_PROCESSOR_CLASS.keys())))
        return 1

    data_processor_obj = DATA_PROCESSOR_CLASS[data_processor]()
    if runner == 'local':
        return transform_local(stream_dtc_file, window_dtc_file,
                               raw_json_files, data_processor_obj)
    else:
        return transform_spark(stream_dtc_file, window_dtc_file,
                               raw_json_files, data_processor_obj)
Esempio n. 3
0
def validate_file(bts_file: str) -> int:
    print('Running validation on {}'.format(bts_file))
    try:
        bts_dict = yaml.safe_load(open(bts_file, 'r', encoding='utf-8'))
        validate(bts_dict)
        print('Document is valid')
        return 0
    except yaml.YAMLError as err:
        eprint('Invalid yaml')
        eprint(str(err))
        return 1
    except SchemaError as err:
        eprint(str(err))
        return 1
    except Exception as err:
        exception_value = sys.exc_info()[1]
        logging.error(exception_value)
        eprint('There was an error parsing the document. Error:\n' + str(err))
        return 1