def _get_next_slice_of_lex_result(lex_result, current_index): # If nothing remaining, use next word no_next_word = current_index == len(lex_result) - 1 if no_next_word: raise parse_primitives.AInixParseError( "Unable to find value for arg that requires one") next_lex_slice = lex_result[current_index + 1][1] return next_lex_slice
def _get_arg_with_long_name( arg_list, long_name: str ): exact_matches = [arg for arg in arg_list if arg.arg_data.get('long_name', None) == long_name] # TODO (DNGros): implement equal sign syntax (like --lines=3) if exact_matches: if len(exact_matches) > 1: raise parse_primitives.AInixParseError("Program has multiple args " "with same long_name", exact_matches) return exact_matches[0] else: # TODO (DNGros): implment "non-ambigious abbriviations" for args raise parse_primitives.AInixParseError( f"Unexpected long arg '{long_name}'.")
def get_arg_with_short_name(arg_list, short_name: str): assert len(short_name) == 1, "unexpectedly long short_name" matches = [a for a in arg_list if a.arg_data.get(SHORT_NAME, None) == short_name] if not matches: raise InvalidShortFlag( f"Unexpected short_name -{short_name}") if len(matches) > 1: raise parse_primitives.AInixParseError( "Program has multiple args with same short_name", matches) return matches[0]
def CmdSeqParser( run: parse_primitives.ObjectParserRun, string: str, result: parse_primitives.ObjectParserResult ): if string == "": raise parse_primitives.AInixParseError("Unable to parse empty string") if "`" in string or "$(" in string: raise parse_primitives.AInixParseError("Subcommand calls not supported") if "### " in string: raise parse_primitives.AInixParseError( "Hackily give up on something that looks like comment at the end" "which sometimes apears in the tellina dataset.") operator_index = _get_location_of_operator(string) if operator_index is None: result.set_arg_present("ProgramArg", 0, len(string)) else: prog_arg_str = string[:operator_index].rstrip() result.set_arg_present("ProgramArg", 0, len(prog_arg_str)) result.set_arg_present("CompoundOp", operator_index, len(string))
def mod_type_parser_func(run: parse_primitives.TypeParserRun, string: str, result: parse_primitives.TypeParserResult) -> None: result.set_next_slice(len(string), len(string)) if string.isupper(): result.set_valid_implementation_name(MODIFIER_ALL_UPPER) elif string.islower(): result.set_valid_implementation_name(MODIFIER_LOWER_NAME) elif string[0].isupper() and string[1:].islower(): result.set_valid_implementation_name(MODIFIER_FIRST_CAP_NAME) else: raise parse_primitives.AInixParseError( f"String {string} did match an expected modifier")
def modifier_type_unparser( result: parse_primitives.TypeToStringResult): impl = result.implementation if impl.name == MODIFIER_LOWER_NAME: result.add_string(symb) elif impl.name == MODIFIER_ALL_UPPER: result.add_string(symb.upper()) elif impl.name == MODIFIER_FIRST_CAP_NAME: result.add_string(symb[0].upper() + symb[1:]) else: raise parse_primitives.AInixParseError( f"Unexpected impl {impl.name}") result.add_impl_unparse()
def regex_group_object_parser( run: parse_primitives.ObjectParserRun, string: str, result: parse_primitives.ObjectParserResult) -> None: for arg in run.all_arguments: regex: str = arg.arg_data[REGEX_GROUP_LOOKUP_KEY] match = re.match(regex, string) arg_present = match is not None if arg_present: start_idx, end_idx = match.span(1) result.set_arg_present(arg.name, start_idx, end_idx) elif arg.required: raise parse_primitives.AInixParseError( f'Arg {arg.name} with RegexRepresentation "{regex}" did not ' f'match on "{string}", but the arg is required.')
def parser(run: parse_primitives.ObjectParserRun, string: str, result: parse_primitives.ObjectParserResult): if not string.lower().startswith(symb): raise parse_primitives.AInixParseError( f"Expected string {string} to start with {symb}") if allow_modifier: mod_arg = run.all_arguments[0] next_arg = run.all_arguments[1] deleg = yield run.left_fill_arg(mod_arg, (0, len(symb))) result.accept_delegation(deleg) else: next_arg = run.all_arguments[0] deleg = yield run.left_fill_arg(next_arg, (len(symb), len(string))) result.accept_delegation(deleg)
def ProgramTypeParser( run: parse_primitives.TypeParserRun, string: str, result: parse_primitives.TypeParserResult ): first_word = string.split(" ")[0] matching_programs = run.match_attribute( run.all_type_implementations, "invoke_name", first_word) if matching_programs: result.set_valid_implementation(matching_programs[0]) next_start_ind = string.find(" ") if next_start_ind == -1: next_start_ind = len(string) else: while next_start_ind < len(string) and string[next_start_ind] == " ": next_start_ind += 1 result.set_next_slice(next_start_ind, len(string)) else: raise parse_primitives.AInixParseError( f"Unable to find program '{first_word}'." f" The input string to program parser was '{string}'" )
def ProgramObjectParser( run: parse_primitives.ObjectParserRun, string: str, result: parse_primitives.ObjectParserResult ): remaining_args = list(run.all_arguments) parameter_end_seen = False already_seen_multiword_positional = False lex_result = lex_bash(string) lex_index = 0 ignore_this_word_dash = False ignore_invalid_flags = run.get_type_data(IGNORE_INVALID_FLAGS, False) while lex_index < len(lex_result): word, (start_idx, end_idx) = lex_result[lex_index] if word == "--": parameter_end_seen = True lex_index += 1 continue single_dash = not parameter_end_seen and len(word) >= 2 and \ word[0] == "-" and word[1] != "-" and not ignore_this_word_dash double_dash = not parameter_end_seen and len(word) >= 3 and word[:2] == "--" if single_dash: done_with_chars = False for ci, char in enumerate(word[1:]): try: shortname_match = get_arg_with_short_name(remaining_args, char) except InvalidShortFlag as e: if ignore_invalid_flags: ignore_this_word_dash = True # We want to get out to the word while loop # we will break and decrement the lex_index so stay where we are lex_index -= 1 break raise e requires_value = shortname_match.type is not None use_start_idx, use_end_idx = end_idx, end_idx if requires_value: # TODO (DNGros): handle args that consume multiple words remaining_chars = ci < len(word[1:]) - 1 if remaining_chars: # Assume rest of chars are the value use_start_idx = start_idx + (ci+1) + 1 # +1 for the dash use_end_idx = end_idx else: next_slice = _get_next_slice_of_lex_result( lex_result, lex_index) use_start_idx, use_end_idx = next_slice lex_index += 1 done_with_chars = True result.set_arg_present(shortname_match.name, use_start_idx, use_end_idx, True) remaining_args.remove(shortname_match) if done_with_chars: break lex_index += 1 elif double_dash: long_name_match = _get_arg_with_long_name( remaining_args, word[2:]) if long_name_match: requires_value = long_name_match.type is not None use_start_idx, use_end_idx = 0, 0 if requires_value: next_slice = _get_next_slice_of_lex_result( lex_result, lex_index) use_start_idx, use_end_idx = next_slice lex_index += 1 result.set_arg_present(long_name_match.name, use_start_idx, use_end_idx) remaining_args.remove(long_name_match) lex_index += 1 else: # Must be a positional arg sorted_pos_args = get_all_positional_args(remaining_args) if not sorted_pos_args: raise AInixParseError(f"Unexpected word '{word}' with no remaing positional args." f" Input was {string}") arg_to_do = sorted_pos_args[0] if arg_to_do.type is None: raise ValueError(f"Positional arg {arg_to_do.name} can't have None type") if SHORT_NAME in arg_to_do.arg_data or LONG_NAME in arg_to_do.arg_data: raise ValueError("Can't be both positional and a flag") is_multiword = arg_to_do.arg_data.get(MULTIWORD_POS_ARG, False) is True if is_multiword and already_seen_multiword_positional: raise ValueError("Cannot parse a multiword positional argument as a previous" "argument was also specified as multiword. Parse is ambigious") if is_multiword: already_seen_multiword_positional = True # We can potentially consume all the remaining words, except we have to leave # room any other positional args there might be. max_words_to_consume = len(lex_result) - lex_index - len(sorted_pos_args[1:]) if not parameter_end_seen and not ignore_invalid_flags: # If we haven't seen a "--" then we need to scan forwards to make sure we # don't accidentally count a option flag as one of this positional arg. words_to_consume = 1 while words_to_consume < max_words_to_consume: lookahead_word, _ = lex_result[lex_index + words_to_consume] if lookahead_word.startswith("-"): # If we see an option arg, stop consuming words for this pos arg here break words_to_consume += 1 else: words_to_consume = max_words_to_consume else: words_to_consume = 1 _, use_end_idx = lex_result[lex_index+words_to_consume-1][1] result.set_arg_present(arg_to_do.name, start_idx, use_end_idx, True) remaining_args.remove(arg_to_do) lex_index += words_to_consume ignore_this_word_dash = False remaining_required_args = [a for a in remaining_args if a.required] if remaining_required_args: raise parse_primitives.AInixParseError( "Unexpected unmatched args", remaining_required_args)