def parse(self, filename): model = None alpha = None lnl = None freq = None rates = None with open(filename) as fl: s = fl.read() try: model, lnl, alpha = self.common.parseString(s).asList() except ParseException as err: logger.error(err) if model == 'JC69': freq = [0.25, 0.25, 0.25, 0.25] rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] elif model == 'K80': freq = [0.25, 0.25, 0.25, 0.25] try: tstv = self.tstv.parseString(s).asList() except ParseException as err: logger.error(err) rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0] elif model == 'F81': try: freq = self.freq.parseString(s).asList() except ParseException as err: logger.error(err) rates = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] elif model == 'F84' or model == 'HKY85' or model == 'TN93': parser = Group(self.tstv) + Group(self.freq) try: tstv, freq = parser.parseString(s).asList() except ParseException as err: logger.error(err) if model == 'TN93': rates = [1.0, tstv[0], 1.0, 1.0, tstv[1], 1.0] else: rates = [1.0, tstv[0], 1.0, 1.0, tstv[0], 1.0] elif model == 'GTR': parser = Group(self.freq) + Group(self.rates) try: freq, rates = parser.parseString(s).asList() except ParseException as err: logger.error(err) return model, alpha, lnl, freq, rates
def parseReactions(reaction): grammar = Group(Word(alphanums) + Suppress('()') + Optional(Suppress('+') + Word(alphanums) + Suppress("()"))) + Suppress("->") + Group(Word(alphanums) + Suppress("()") + Optional(Suppress('+') + Word(alphanums) + Suppress("()"))) + Suppress(Word(alphanums + "()")) return grammar.parseString(reaction).asList()
def _read_solution(scenario,log,task_to_id,id_to_resource) : S = scenario # parse output from pyparsing import Keyword,Literal,Word,alphas,nums,printables,OneOrMore,ZeroOrMore,dblQuotedString,Group INT = Word( nums ) int_row = Group( INT + Literal(",").suppress() + \ INT + Literal(",").suppress() + \ INT + Literal(";").suppress() ) plan = Group( Group( ZeroOrMore(int_row) ) ) start_str, end_str = '##START_SOLUTION##', '##END_SOLUTION##' start_i, end_i = log.index(start_str)+len(start_str), log.index(end_str) opl_plan = plan.parseString(log[start_i:end_i]) int_plan = opl_plan[0][0] # get starts and resource assignments starts = dict() assign = dict() for row in int_plan : task_id = int(row[0]) starts[task_id] = int(row[2]) if task_id not in assign : assign[task_id] = list() assign[task_id].append(int(row[1])) # add to scenario for T in S.tasks() : T.start = starts[task_to_id[T]] #second column is start if T.resources is None : T.resources = list() T.resources += [ id_to_resource[j] for j in assign[task_to_id[T]] ]
def _get_infos_from_file(self, info): # {object}: file format elf32-xtensa-le object_line = SkipTo(':').setResultsName('object') + Suppress(restOfLine) # Sections: # Idx Name ... section_start = Suppress(Literal('Sections:')) section_header = Suppress(OneOrMore(Word(alphas))) # 00 {section} 0000000 ... # CONTENTS, ALLOC, .... section_entry = Suppress(Word(nums)) + SkipTo(' ') + Suppress(restOfLine) + \ Suppress(ZeroOrMore(Word(alphas) + Literal(',')) + Word(alphas)) content = Group(object_line + section_start + section_header + Group(OneOrMore(section_entry)).setResultsName('sections')) parser = Group(ZeroOrMore(content)).setResultsName('contents') results = None try: results = parser.parseString(info.content, parseAll=True) except ParseException as p: raise ParseException('Unable to parse section info file ' + info.filename + '. ' + p.msg) return results
def _get_infos_from_file(self, info): # Object file line: '{object}: file format elf32-xtensa-le' object = Fragment.ENTITY.setResultsName("object") + Literal(":").suppress() + Literal("file format elf32-xtensa-le").suppress() # Sections table header = Suppress(Literal("Sections:") + Literal("Idx") + Literal("Name") + Literal("Size") + Literal("VMA") + Literal("LMA") + Literal("File off") + Literal("Algn")) entry = Word(nums).suppress() + Fragment.ENTITY + Suppress(OneOrMore(Word(alphanums, exact=8)) + Word(nums + "*") + ZeroOrMore(Word(alphas.upper()) + Optional(Literal(",")))) # Content is object file line + sections table content = Group(object + header + Group(ZeroOrMore(entry)).setResultsName("sections")) parser = Group(ZeroOrMore(content)).setResultsName("contents") sections_info_text = info.content results = None try: results = parser.parseString(sections_info_text) except ParseException as p: raise ParseException("Unable to parse section info file " + info.filename + ". " + p.msg) return results
def parse_showhdinfo(stdout, stderr): """ """ uuid_prefix = Suppress(Word('UUID:')) id_uuid = Word(alphanums + '-').setResultsName('uuid') accessible_prefix = Suppress(Word('Accessible:')) id_accessible = Word(alphas).setResultsName('accessible') logical_size_prefix = Suppress(Word('Logical size:')) id_logical_size = Word(alphanums + ' ').setResultsName('logical_size') current_size_prefix = Suppress(Word('Current size on disk:')) id_current_size = Word(alphanums + ' ').setResultsName('current_size') type_prefix = Suppress(Word('Type:')) id_type = Word(alphas + ' ()').setResultsName('type') prefix_storage_format = Suppress(Word('Storage format:')) id_storage_format = Word(alphas).setResultsName('storage_format') prefix_format_variant = Suppress(Word('Format variant:')) id_format_variant = Word(alphanums + ' ').setResultsName('format_variant') prefix_location = Suppress(Word('Location:')) id_location = Word(alphanums + ' /.').setResultsName('location') hd_info = Group(uuid_prefix + id_uuid + EOL + accessible_prefix + id_accessible + EOL + logical_size_prefix + id_logical_size + EOL + current_size_prefix + id_current_size + EOL + type_prefix + id_type + EOL + prefix_storage_format + id_storage_format + EOL + prefix_format_variant + id_format_variant + EOL + prefix_location + id_location + EOL) out = hd_info.parseString(stdout)[0] return {'uuid': out.uuid, 'accessible': out.accessible, 'logical_size': out.logical_size, 'current_size': out.current_size, 'type': out.type, 'storage_format': out.storage_format, 'format_variant': out.storage_variant, 'location': out.location}
def process(path): print("Input:", path) with open(path) as f: rules = [] messages = [] for line in f: if line.strip(): spec = Line.parseString(line) if "rule" in spec: rules.append(spec) else: messages.append(spec[0]) grammar = build_grammar(rules) valid = [] invalid = [] part_2_grammar = Group(OneOrMore(Group(grammar[42]))) + Group( OneOrMore(Group(grammar[31]))) for message in messages: try: result = part_2_grammar.parseString(message, parseAll=True) if len(result[1]) < len(result[0]): valid.append(message) except: invalid.append(message) print("\tNumber Valid:", len(valid)) print(valid) print(invalid)
def parse_connection_str(connstr): ## Grammar for connection syntax digits = "0123456789" othervalid = "_.@" identifier = Word(alphas + digits + othervalid) nodename = identifier.setResultsName('nodename') outputnames = delimitedList(identifier).setResultsName('outputnames') inputnames = delimitedList(identifier).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode = Group(nodename + Suppress('(') + inputnames + Optional("|" + outputnames) + Suppress(")")).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed = connectlist.parseString(connstr) check_numconnections(parsed) return parsed
class ListedVerses(BaseFormatClass): def __init__(self, stream): BaseFormatClass.__init__(self, stream) logging.debug("Initializing ListedVerses...") # NOTE: self.verse_word is defined in the base class # NOTE: self.verse is defined in the base class # Grammers defined here # The verses for this formatter are listed numerically # The identifier this formatter will use the number. self.number = Word(nums + ".") # This is the dataset we will mainly be operating on, since # it will contain the verse and the identifier self.verse_reference = Group(self.number) + Group(self.verse) # Must return a dataset with these def get_result(self): # Reading input stream line by line for line in self.stream: logging.debug("Parsing line {}".format(line)) parsed_line = self.verse_reference.parseString(line) self.result['body'] = parsed_line[1] logging.debug("Body for this line is: {}".format( self.result['body'])) # The identifier for each line when this file format is used # is the prepended number (The verse number) self.result['identifier'] = ''.join(parsed_line[0]) logging.debug("Identifier for this line is {}".format( self.result['identifier'])) # There are no extras for this formatter self.result['extras'] = {} yield self.result
def parse_connection_str(connstr): ## Grammar for connection syntax digits="0123456789" othervalid="_.@" identifier= Word(alphas+digits+othervalid) nodename=identifier.setResultsName('nodename') outputnames = delimitedList( identifier ).setResultsName('outputnames') inputnames = delimitedList( identifier ).setResultsName('inputnames') # middle nodes have both inputs and outputs middlenode= Group( nodename + Suppress('(') + inputnames + Optional( "|" + outputnames) + Suppress(")") ).setResultsName('middlenode') # first node has only outputs headnode = (nodename + Suppress("(") + outputnames + Suppress(")")).setResultsName('headnode') # last node has only inputs tailnode = (nodename + Suppress("(") + inputnames + Suppress(")")).setResultsName('tailnode') # connect head -> [middle ->] tail connect= Group( headnode + Group(ZeroOrMore(Suppress("->") \ + middlenode + FollowedBy("->") )).setResultsName('middlenodes') + Suppress("->")+tailnode).setResultsName('nodes') connectlist = Group( connect + ZeroOrMore( Suppress(";")\ + connect )).setResultsName('connects') parsed=connectlist.parseString(connstr) check_numconnections(parsed) return parsed
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" return self.parser.parseString(ofc).asDict()
def _get_infos_from_file(self, info): # Object file line: '{object}: file format elf32-xtensa-le' object = Fragment.ENTITY.setResultsName("object") + Literal(":").suppress() + Literal("file format elf32-xtensa-le").suppress() # Sections table header = Suppress(Literal("Sections:") + Literal("Idx") + Literal("Name") + Literal("Size") + Literal("VMA") + Literal("LMA") + Literal("File off") + Literal("Algn")) entry = Word(nums).suppress() + Fragment.ENTITY + Suppress(OneOrMore(Word(alphanums, exact=8)) + Word(nums + "*") + ZeroOrMore(Word(alphas.upper()) + Optional(Literal(",")))) # Content is object file line + sections table content = Group(object + header + Group(ZeroOrMore(entry)).setResultsName("sections")) parser = Group(ZeroOrMore(content)).setResultsName("contents") sections_info_text = info.content results = None try: results = parser.parseString(sections_info_text) except ParseException as p: raise ParseException("Unable to parse section info file " + info.filename + ". " + p.message) return results
def parseReactions(reaction): components = (Word(alphanums + "_") + Optional(Group('~' + Word(alphanums+"_"))) + Optional(Group('!' + Word(alphanums+'+?')))) molecule = (Word(alphanums + "_") + Optional(Suppress('(') + Group(components) + ZeroOrMore(Suppress(',') + Group(components)) +Suppress(')'))) species = Group(molecule) + ZeroOrMore(Suppress('.') + Group(molecule)) result = species.parseString(reaction).asList() return result
def read_tgf(path): """Generates an alias.ArgumentationFramework from a Trivial Graph Format (.tgf) file. Trivial Graph Format (TGF) is a simple text-based file format for describing graphs. \ It consists of a list of node definitions, which map node IDs to labels, followed by \ a list of edges, which specify node pairs and an optional edge label. \ Node IDs can be arbitrary identifiers, whereas labels for both nodes and edges are plain strings. Parameters ---------- path : file or string File, directory or filename to be read. Returns ------- framework : alias ArgumentationFramework Examples -------- References ---------- http://en.wikipedia.org/wiki/Trivial_Graph_Format """ try: from pyparsing import Word, alphanums, ZeroOrMore, White, Suppress, Group, ParseException, Optional except ImportError: raise ImportError("read_tgf requires pyparsing") if not isinstance(path, str): return # Define tgf grammar s = White(" ") tag = Word(alphanums) arg = Word(alphanums) att = Group(arg + Suppress(s) + arg + Optional(Suppress(s) + tag)) nl = Suppress(White("\n")) graph = Group(ZeroOrMore(arg + nl)) + Suppress("#") + nl + Group(ZeroOrMore(att + nl) + ZeroOrMore(att)) f = open(path, 'r') f = f.read() head, tail = ntpath.split(path) framework = al.ArgumentationFramework(tail) try: parsed = graph.parseString(f) except ParseException, e: raise al.ParsingException(e)
def parseReactions(reaction): components = (Word(alphanums + "_") + Optional(Group('~' + Word(alphanums+"_"))) + Optional(Group('!' + Word(alphanums+'+?')))) molecule = (Word(alphanums + "_") + Optional(Suppress('(')) + Group(components) + ZeroOrMore(Suppress(',') + Group(components)) +Suppress(')')) species = Group(molecule) + ZeroOrMore(Suppress('.') + Group(molecule)) result = species.parseString(reaction).asList() return result
def parseReactions(reaction): components = (Word(alphanums + "_") + Optional(Group("~" + Word(alphanums + "_"))) + Optional(Group("!" + Word(alphanums + "+?")))) molecule = Word(alphanums + "_") + Optional( Suppress("(") + Group(components) + ZeroOrMore(Suppress(",") + Group(components)) + Suppress(")")) species = Group(molecule) + ZeroOrMore(Suppress(".") + Group(molecule)) result = species.parseString(reaction).asList() return result
class Outline(BaseFormatClass): def __init__(self, stream): BaseFormatClass.__init__(self, stream) logging.debug("Initializing Outline...") # Grammers defined here self.roman_letters = "IVX" self.roman_letters_small = 'ivx' self.points = Word(self.roman_letters + alphas + nums + self.roman_letters_small) self.point_identifier = self.points + "." self.chapter_and_verse = Word(nums) + \ ":" + \ Word(nums) + \ Optional("-") + \ Optional(Word(nums)) + \ Optional(Word(";,")) self.listed_verse_grammer = Word(nums) + Optional(Word(";,")) self.abbriv = Word(alphas + ".") self.verse_list = OneOrMore(self.listed_verse_grammer ^ self.chapter_and_verse) self.verses = Optional(Word(nums)) + \ self.abbriv + \ self.verse_list # Grammer to match the line on an outline self.line_grammer = Group(Optional(White())) + \ Group(Optional(self.point_identifier)) + \ Group(self.verse) + \ Optional("-") + \ Group(ZeroOrMore(self.verses)) def get_result(self): for line in self.stream: logging.debug("Parsing line {}".format(line)) parsed_line = self.line_grammer.parseString(line) self.result['body'] = parsed_line[2] logging.debug("Body for this line is: {}".format( self.result['body'])) # The identifier for each line when this file format is # used is the point identifier self.result['identifier'] = ''.join(parsed_line[1]) logging.debug("Identifier for this line is {}".format( self.result['identifier'])) self.result['extras'] = { 'verse_references': parsed_line[-1], 'leading_white': ''.join(parsed_line[0]) } logging.debug("Extras for this line are: {}".format( self.result['extras'])) yield self.result
def _extra_deps(value, entry, depends_target, exclude=None): if entry in value: pattern = Group( Optional(Group(Literal('@') + Word(alphanums + '_-')).suppress()) + Optional(Literal('//').suppress()) + Optional(Word(alphanums + '_-/').suppress()) + Optional(Literal(':').suppress()) + Word(alphanums + '_-')) for dep in value.get(entry): try: extract_name = pattern.parseString(dep)[0][0] if extract_name != exclude: # exclude self references depends_target.update({extract_name}) except Exception as e: logger.warning('No valid Build content %s' % dep)
def parse_startvm(stdout, stderr): """ """ waiting_prefix = Word('Waiting for VM') waiting_uuid = UUID_STRING.setResultsName('waiting_uuid') waiting_postfix = Word('to power on...') success_prefix = Word('VM') success_uuid = UUID_STRING.setResultsName('success_uuid') success_postfix = Word("has been successfully started.") total = Group(waiting_prefix + DBLQUOTE + waiting_uuid + DBLQUOTE + waiting_postfix + EOL + success_prefix + DBLQUOTE + success_uuid + DBLQUOTE + success_postfix) out = total.parseString(stdout)[0] return {'uuid': out.success_uuid}
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction(lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{') + SkipTo(Keyword("}"))) command = Group( OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print("Parsing Input Configuration...") lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def parse_createvm(stdout, stderr): """ """ single_quote = Suppress(Literal('\'')) name_prefix = Suppress(Word('Virtual machine')) id_name = Word(alphanums).setResultsName('name') name_postfix = Suppress(Word('is created and registered.')) uuid_prefix = Suppress(Word('UUID:')) id_vm_uuid = Word(srange("[a-zA-Z0-9_\-]")).setResultsName('uuid') file_prefix = Suppress(Word('Settings file:')) id_file_path = Word(alphanums + " /.").setResultsName('file_path') vm_info = Group(name_prefix + single_quote + id_name + single_quote + name_postfix + EOL + uuid_prefix + id_vm_uuid + EOL + file_prefix + single_quote + id_file_path + single_quote + EOL) out = vm_info.parseString(stdout)[0] return {'name': out.name, 'uuid': out.uuid, 'file_path': out.file_path}
def parse_p4(spec): p4header = p4_header() p4parser, p4starter = p4_parser() p4action = p4_action() p4 = Group(ZeroOrMore(p4header) & ZeroOrMore(p4action) & ZeroOrMore(p4parser) & Optional(p4starter)) parsed = p4.parseString(spec)[0] grouped = {'headers':[], 'parsers':[], 'actions':[]} for group in parsed: if 'header' in group: grouped['headers'].append(group) elif 'parser' in group.asList(): grouped['parsers'].append(group[1:]) elif 'action' in group.asList(): grouped['actions'].append(group[1:]) else: raise Exception("Part of P4 description not categoriased!") return grouped
def parsing(input): parse_time = Suppress("[") + Word("[" + "-" + nums) + Word(nums + ":" + "." + "+") + Suppress("]") # Правило для парсинга уникалного номера parse_number = Suppress("[") + Word(nums) + Suppress("]") + Suppress( Word(":")) # Правило для парсинга события, состояещего только из слов( например,pdaemon is already running) parse_only_alphas_event = OneOrMore(Word(alphas)) # Правило для парсинга события,состоящего из всякого( # например, /var/www/mooc-linux-programming/status/task exists: 9eb35974-c960-43e6-a8fe-1a8fc7d5a1bf) parse_mooc_event = Word("/" + alphas + "-" + ":") + Word(alphas + ":") + Word(alphas + nums + "-") # Правило для парсинга событий лога любого типа parse_event = ZeroOrMore(parse_only_alphas_event) + ZeroOrMore( parse_mooc_event) # Конечное правило для парсинга строк. Составляюие даты сгруппируются в 1 список, # уникальный номер во второй список,событие в третий список parse_module = Group(parse_time) + Group(parse_number) + Group(parse_event) # Результат парсинга( parse_result = parse_module.parseString(input) # Список, содержащий результаты разбора одной строки single_list = [] """ Описание цикла: Для каждого элемента,кроме последнего, каждого списка добавить пробел(при парсинге он теряется). Затем элементы каждого отдельного списка собираются в отдельную строку. Затем каждая строка(всего их 3 добавляется в список). Итог: Входные данные:строка,которую нам нужно разобрать ( например,[2016-05-14 17:25:01.642065713+03:00][4281]:pdaemon is already running.) Выходные данные: Список,состоящий из 3-х строк-элементов """ for elem in parse_result: elem[len(elem) - 1] = elem[len(elem) - 1][:15] for elem in parse_result: for i in range(len(elem) - 1): elem[i] += " " complete_string = '' for x in elem: complete_string += x single_list.append(complete_string) # single_list[0] = datetime.strptime(single_list[0], '%Y-%m-%d %H:%M:%S.%f') return single_list
class CommandLineParser(object): def __init__(self): dash = Word("-",max=2) operator = oneOf(": =") argValueType1 = quotedString.setParseAction(removeQuotes) argValueType2 = Regex("[a-zA-Z0-9_\./]+") positionalArgument = (argValueType1 | argValueType2) regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2)) novalueArgument = Combine(dash + Word(alphas)) arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument) self.parser = Group(Word(alphas) + arguments).setResultsName("command") def parseString(self, querystring): return self.parser.parseString(querystring)
def parse_config_file(filepath): """ This function defines that to parsed the netscalar input file :param filepath: path of netscalar input configuration :return: return parsed dict """ EOL = LineEnd().suppress() comment = Suppress("#") + Suppress(restOfLine) + EOL SOL = LineStart().suppress() blank_line = SOL + EOL result = [] hyphen = Literal("-") not_hyphen_sign = ''.join(c for c in printables if c != '-') text = Word(not_hyphen_sign, printables) key = Word('-', printables).setParseAction( lambda t: t[0].replace('-', '', 1)) val = originalTextFor(Optional(ZeroOrMore(text), default=None)) option = Group(key + val) multi_word_names = quotedString q_obj = originalTextFor(Keyword('q{')+SkipTo(Keyword("}"))) command = Group(OneOrMore(q_obj | multi_word_names | text) + ZeroOrMore(option)) command.ignore(comment | blank_line) with open(filepath) as infile: line_no = 1 print "Parsing Input Configuration..." lines = infile.readlines() total_lines = len(lines) for line in lines: try: tmp = command.parseString(line) tokens = tmp.asList() if tokens: tokens[0].append(['line_no', str(line_no)]) result += tokens line_no += 1 except Exception as exception: line_no += 1 LOG.error("Parsing error: " + line) msg = "Parsing started..." if line_no <= total_lines: ns_util.print_progress_bar(line_no, total_lines, msg, prefix='Progress', suffix='') return result
def _read_solution(scenario, log, task_to_id, id_to_resource, msg=0): S = scenario # parse output from pyparsing import Keyword, Literal, Word, alphas, nums, printables, OneOrMore, ZeroOrMore, dblQuotedString, Group INT = Word(nums) int_row = Group( INT + Literal(",").suppress() + \ INT + Literal(",").suppress() + \ INT + Literal(";").suppress() ) plan = Group(Group(ZeroOrMore(int_row))) try: start_str, end_str = '##START_SOLUTION##', '##END_SOLUTION##' start_i, end_i = log.index(start_str) + len(start_str), log.index( end_str) except: print(log) if msg: print('ERROR: no solution found') return 0 opl_plan = plan.parseString(log[start_i:end_i]) int_plan = opl_plan[0][0] # get starts and resource assignments starts = dict() assign = dict() for row in int_plan: task_id = int(row[0]) starts[task_id] = int(row[2]) if task_id not in assign: assign[task_id] = list() assign[task_id].append(int(row[1])) # add to scenario for T in S.tasks(): T.start_value = starts[task_to_id[T]] if T.resources is None: T.resources = list() T.resources = [id_to_resource[j] for j in assign[task_to_id[T]]] return 1
class ReferencedVerses(BaseFormatClass): def __init__(self, stream): BaseFormatClass.__init__(self, stream) logging.debug("Initializing ReferencedVerses...") # Grammers defined here #NOTE: self.verse_word is defined in the base class #NOTE: self.verse is defined in the base class self.book_num = Optional(Word(nums, exact=1)) self.book_name = Word(alphas) self.book_chapter = Word(nums) self.book_verse = Word(nums) self.reference = (self.book_num + self.book_name + Group(self.book_chapter + ":" + self.book_verse)) # This is the dataset we will mainly be operating on, since # it will contain the verse and the reference seperated out self.verse_reference = Group(self.verse) + Suppress('-') + Group( self.reference) # Must return a dataset with these def get_result(self): # Reading input stream line by line for line in self.stream: logging.debug("Parsing line: {}".format(line)) parsed_line = self.verse_reference.parseString(line) self.result['body'] = parsed_line[0] logging.debug("Body for this line is: {}".format( self.result['body'])) # The identifier for each line when this file format is used # is the verse reference self.result['identifier'] = "{} {}".format( ' '.join(parsed_line[1][:-1]), ''.join(parsed_line[1][-1])) logging.debug("Identifier for this line is {}".format( self.result['identifier'])) # The extra attribute 'identifier_position' is so that the # consumer has a way of knowing if the identifer should # go before the line starts, or after the line starts. self.result['extras'] = {'identifier_position': 'off'} self.result['extras']['verse_references'] = { self.result['identifier'] } yield self.result
def parse_pabl(self, raw_pabl): INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction( self.check_sub_indent) UNDENT = FollowedBy(empty).setParseAction(self.check_unindent) UNDENT.setParseAction(self.unindent) terminator = Literal(';').suppress() comment = Literal('#') + restOfLine item_name = Word(alphas, alphanums + '_') variable = Word(alphas, alphanums + '_.') variable_as = (variable + 'as' + item_name) stmt = Forward() suite = Group( OneOrMore(empty + stmt.setParseAction(self.check_peer_indent))) suite.ignore(comment) item_start = Literal('@item').suppress() item_end = Literal(':').suppress() permission_start = Literal('@permissions') item_decl = (item_start + item_name.setResultsName('item') + item_end) item_defn = Group(item_decl + INDENT + suite + UNDENT) permission_decl = (permission_start + Group( delimitedList(item_name).setResultsName('permissions')) + item_end) permission_defn = Group(permission_decl + INDENT + suite + UNDENT) fieldList = delimitedList( Group(variable_as) | variable ).setResultsName('fields') + terminator stmt << (item_defn | fieldList | Group(permission_defn)) parseTree = suite.parseString(raw_pabl) return parseTree
def read_tgf(path): try: from pyparsing import Word, alphanums, ZeroOrMore, White, Suppress, Group, ParseException, Optional except ImportError: raise ImportError("read_tgf requires pyparsing") if not isinstance(path, str): return # Define tgf grammar s = White(" ") tag = Word(alphanums) arg = Word(alphanums) att = Group(arg + Suppress(s) + arg + Optional(Suppress(s) + tag)) nl = Suppress(White("\n")) graph = Group(ZeroOrMore(arg + nl)) + Suppress("#") + nl + Group( ZeroOrMore(att + nl) + ZeroOrMore(att)) f = open(path, 'r') f = f.read() head, tail = ntpath.split(path) framework = ArgumentationFramework(tail) try: parsed = graph.parseString(f) except ParseException as e: raise e for arg in parsed[0]: framework.add_argument(arg) for att in parsed[1]: framework.add_attack(att[0], att[1]) return framework
class JSONCommandLineParser(object): def __init__(self): dash = Word("-",max=2) operator = oneOf(": =") argValueType1 = quotedString.setParseAction(removeQuotes) argValueType2 = Regex("[a-zA-Z0-9_\./]+") positionalArgument = (argValueType1 | argValueType2) regularArgument = Combine(dash + Word(alphas) + operator + (argValueType1 | argValueType2)) novalueArgument = Combine(dash + Word(alphas)) arguments = ZeroOrMore(positionalArgument | regularArgument | novalueArgument) self.parser = Group(Word(alphas) + arguments).setResultsName("command") + SkipTo(Word("{[")).setParseAction(self.jsonData) def jsonData(self, commandtext, location, tokens): startindex = location + len("".join(tokens)) return commandtext[startindex:] def parseString(self, querystring): result = self.parser.parseString(querystring) return {"command":result.command, "filedata":result[1]}
class SExpressionParser(object): def __init__(self): self.lpar = Literal('(') self.rpar = Literal(')') self.word_chars = ''.join(c for c in printables if c not in ('()')) self.word = Word(self.word_chars) | quotedString self.atom = self.word self.expression = Forward() self.composite_expression = ( Suppress(self.lpar) + ZeroOrMore(self.expression) + Suppress(self.rpar))('composite_expression') self.composite_expression.addParseAction( self._composite_expression_to_tuple) self.expression << (self.atom | self.composite_expression) self.expressions = Group(ZeroOrMore(self.expression))('expressions') self.expressions.addParseAction(self._expressions_to_tuple) def parse_expression(self, instring): return self.expression.parseString(instring, parseAll=True)[0] def parse_expressions(self, instring): return self.expressions.parseString(instring, parseAll=True)[0] @staticmethod def _composite_expression_to_tuple(toks): return SExpression(toks.composite_expression) @staticmethod def _expressions_to_tuple(toks): return SExpressionList(toks.expressions)
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.remove_inline_closing_tags(ofc) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc) filled_ofc = re.sub(expression % 'CHKNUM', replacement % 'CHKNUM' , ofc) return filled_ofc def _inject_tags(self, ofc): tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.add_zero_to_empty_ledger_tag(ofc) ofc = self.remove_inline_closing_tags(ofc) ofc = ofxtools.util.strip_empty_tags(ofc) ofc = self._translate_chknum_to_checknum(ofc) # if you don't have a good stomach, skip this part # XXX:needs better solution import sys sys.setrecursionlimit(5000) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def add_zero_to_empty_ledger_tag(self, ofc): """ Fix an OFC, by adding zero to LEDGER blank tag """ return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc) def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID', ofc) filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM', ofc) return filled_ofc def _translate_chknum_to_checknum(self, ofc): """ Some banks put an CHKNUM instead of CHECKNUM. this method translates CHKNUM to CHECKNUM in order to parse this information correctly """ return re.sub('CHKNUM', 'CHECKNUM', ofc) def _inject_tags(self, ofc): tags = "<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
def import_file(self, file): triple = Group(_literal + _literal + _literal + Literal('.').suppress()) for line in file: tokens = triple.parseString(line) self.add_triples(*[tuple(t) for t in tokens])
class Parser(object): """A parser class for solving simple data accesses and super-indexing data :param data: Trace Object :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child class (like :mod:`trappy.ftrace.FTrace`) :param pvars: A dictionary of variables that need to be accessed from within the grammar :type pvars: dict :param method: The method to be used for reindexing data This can be one of the standas :mod:`pandas.DataFrame` methods (eg. pad, bfill, nearest). The default is pad or use the last valid observation. :type method: str :param limit: The number of indices a value will be propagated when reindexing. The default is None :type limit: int :param fill: Whether to fill the NaNs in the data. The default value is True. :type fill: bool :param window: A window of time in which to apply the data accesses. By default the data accesses happen accross the whole trace. With the window parameter you can limit it to a window of time inside the trace. The first element of the tuple is the starting time and the second the ending time (set to None for end of trace). :type window: tuple :param filters: Restrict the parsing to the rows that match the specified criteria. For Example: :: filters = { "pid": 3338, "cpu": [0, 2, 4], } will only consider rows whose pid column is 3338 and cpu is either 0, 2 or 4. :type filters: dict - **Operators** +----------------+----------------------+---------------+ | Operation | operator | Associativity | +================+======================+===============+ | Exponentiation | \*\* | Left | +----------------+----------------------+---------------+ |Unary | \- | Right | +----------------+----------------------+---------------+ | Multiply/Divide| \*, /, //, % | Left | +----------------+----------------------+---------------+ | Add/Subtract | +, \-, | Left | +----------------+----------------------+---------------+ | Comparison | >, <, >=, <=, ==, != | Left | +----------------+----------------------+---------------+ | Logical | &&, ||, \|, & | Left | +----------------+----------------------+---------------+ - **Data Accessors** Since the goal of the grammar is to provide an easy language to access and compare data from a :mod:`trappy.trace.FTrace` object. The parser provides a simple notation to access this data. *Statically Defined Events* :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp * 2") *Aliasing* :: import trappy from trappy.stats.grammar import Parser pvars = {"THERMAL": trappy.thermal.Thermal} trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace, pvars=pvars) parser.solve("THERMAL:temp * 2") *Using Event Name* :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("thermal:temp * 2") The event :mod:`trappy.thermal.Thermal` is aliased as **thermal** in the grammar *Dynamic Events* :: import trappy from trappy.stats.grammar import Parser # Register Dynamic Event cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name") pvars = {"CUSTOM": cls} trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace, pvars=pvars) parser.solve("CUSTOM:col * 2") .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace` """ def __init__(self, data, pvars=None, window=(0, None), filters=None, **kwargs): if pvars is None: pvars = {} self.data = data self._pvars = pvars self._accessor = Group( FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process) self._inspect = Group( FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info) self._parse_expr = get_parse_expression( self._parse_func, self._parse_var_id) self._agg_df = pd.DataFrame() self._pivot_set = set() self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT) self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT) self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT) self._window = window self._filters = filters def solve(self, expr): """Parses and solves the input expression :param expr: The input expression :type expr: str :return: The return type may vary depending on the expression. For example: **Vector** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp * 2") **Scalar** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("numpy.mean(trappy.thermal.Thermal:temp)") **Vector Mask** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp > 65000") """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] def _pivot(self, cls, column): """Pivot Data for concatenation""" data_frame = self._get_data_frame(cls) if data_frame.empty: raise ValueError("No events found for {}".format(cls.name)) data_frame = handle_duplicate_index(data_frame) new_index = self._agg_df.index.union(data_frame.index) if hasattr(cls, "pivot") and cls.pivot: pivot = cls.pivot pivot_vals = list(np.unique(data_frame[pivot].values)) data = {} for val in pivot_vals: data[val] = data_frame[data_frame[pivot] == val][[column]] if len(self._agg_df): data[val] = data[val].reindex( index=new_index, method=self._method, limit=self._limit) return pd.concat(data, axis=1).swaplevel(0, 1, axis=1) if len(self._agg_df): data_frame = data_frame.reindex( index=new_index, method=self._method, limit=self._limit) return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[ [column]]}, axis=1).swaplevel(0, 1, axis=1) def _pre_process(self, tokens): """Pre-process accessors for super-indexing""" params = tokens[0] if params[1] in self._agg_df.columns: return self._agg_df[params[1]] event = params[0] column = params[1] if event in self._pvars: cls = self._pvars[event] elif event in self.data.class_definitions: cls = self.data.class_definitions[event] else: try: cls = str_to_attr(event) except KeyError: raise ValueError( "Can't find parser class for event {}".format(event)) data_frame = self._pivot(cls, column) self._agg_df = pd.concat( [self._agg_df, data_frame], axis=1) if self._fill: self._agg_df = self._agg_df.fillna(method="pad") return self._agg_df[params[1]] def _parse_for_info(self, tokens): """Parse Action for inspecting data accessors""" params = tokens[0] cls = params[0] column = params[1] info = {} info["pivot"] = None info["pivot_values"] = None if cls in self._pvars: cls = self._pvars[cls] elif cls in self.data.class_definitions: cls = self.data.class_definitions[cls] else: cls = str_to_attr(cls) data_frame = self._get_data_frame(cls) info["class"] = cls info["length"] = len(data_frame) if hasattr(cls, "pivot") and cls.pivot: info["pivot"] = cls.pivot info["pivot_values"] = list(np.unique(data_frame[cls.pivot])) info["column"] = column info["column_present"] = column in data_frame.columns return info def _parse_var_id(self, tokens): """A function to parse a variable identifier """ params = tokens[0] try: return float(params) except (ValueError, TypeError): try: return self._pvars[params] except KeyError: return self._agg_df[params[1]] def _parse_func(self, tokens): """A function to parse a function string""" params = tokens[0] func_name = params[0] if func_name in self._pvars and isinstance( self._pvars[func_name], types.FunctionType): func = self._pvars[func_name] else: func = str_to_attr(params[0]) return func(*params[1]) def _get_data_frame(self, cls): """Get the data frame from the BareTrace object, applying the window and the filters""" data_frame = getattr(self.data, cls.name).data_frame if data_frame.empty: return data_frame elif self._window[1] is None: data_frame = data_frame.loc[self._window[0]:] else: data_frame = data_frame.loc[self._window[0]:self._window[1]] if self._filters: criterion = pd.Series([True] * len(data_frame), index=data_frame.index) for filter_col, wanted_vals in self._filters.iteritems(): try: dfr_col = data_frame[filter_col] except KeyError: continue criterion &= dfr_col.isin(listify(wanted_vals)) data_frame = data_frame[criterion] return data_frame def ref(self, mask): """Reference super indexed data with a boolean mask :param mask: A boolean :mod:`pandas.Series` that can be used to reference the aggregated data in the parser :type mask: :mod:`pandas.Series` :return: aggregated_data[mask] """ return self._agg_df[mask] def inspect(self, accessor): """A function to inspect the accessor for information :param accessor: A data accessor of the format <event>:<column> :type accessor: str :return: A dictionary of information """ return self._inspect.parseString(accessor)[0]
class DefinitionParser(object): _str = set([str, unicode]) lb = "[" rb = "]" lp = "(" rp = ")" left_defa = '<' right_defa = '>' clause_sep = "," part_sep = ";" prime = "'" hyphen = "-" langspec_pre = "$" # starts langspec deep case unary_p = re.compile("^[a-z_#\-/0-9]+(/[0-9]+)?$") binary_p = re.compile("^[A-Z_0-9]+(/[0-9]+)?$") def __init__(self, plur_dict): self.plur_dict = plur_dict self.init_parser() @classmethod def _is_binary(cls, s): return ((type(s) in cls._str and cls.binary_p.match(s)) or (type(s) is list and s[0] == deep_pre and s[1] == "REL")) @classmethod def _is_unary(cls, s): return ((type(s) in cls._str and cls.unary_p.match(s) is not None) or (type(s) is list and ( (s[0] == deep_pre) or (s[0] == cls.langspec_pre) or (s[0] == enc_pre) or (s[0] == cls.left_defa) ))) @classmethod def _is_deep_case(cls, s): return s in deep_cases def init_parser(self): self.lb_lit = Literal(DefinitionParser.lb) self.rb_lit = Literal(DefinitionParser.rb) self.lp_lit = Literal(DefinitionParser.lp) self.rp_lit = Literal(DefinitionParser.rp) self.left_defa_lit = Literal(DefinitionParser.left_defa) self.right_defa_lit = Literal(DefinitionParser.right_defa) self.clause_sep_lit = Literal(DefinitionParser.clause_sep) self.part_sep_lit = Literal(DefinitionParser.part_sep) self.prime_lit = Literal(DefinitionParser.prime) self.hyphen_lit = Literal(DefinitionParser.hyphen) self.enc_pre_lit = Literal(enc_pre) self.deep_pre_lit = Literal(deep_pre) self.avm_pre_lit = Literal(avm_pre) self.langspec_pre_lit = Literal(DefinitionParser.langspec_pre) self.id_sep_lit = Literal(id_sep) self.disambig_id = self.id_sep_lit + Word(nums) self.deep_cases = Group(self.deep_pre_lit + Word(string.uppercase)) self.unary = Forward() self.unary << (Combine(Optional("-") + Word(string.lowercase + "_" + nums) + Optional(self.disambig_id)) | self.deep_cases | Group(self.langspec_pre_lit + Word(string.uppercase + "_")) | Group(self.avm_pre_lit + Word(string.ascii_letters + "_")) | Group(self.enc_pre_lit + Word(alphanums + "_-")) | Group(self.left_defa_lit + self.unary + self.right_defa_lit)) self.binary = (Combine(Word(string.uppercase + "_" + nums) + Optional(self.disambig_id)) | Group(self.deep_pre_lit + 'REL')) self.dontcare = SkipTo(LineEnd()) # main expression self.expression = Forward() self.binexpr = Forward() self.unexpr = Forward() self.argexpr = Forward() # "enumerable expression" # D -> E | E, D self.definition = Group(delimitedList(self.expression, delim=DefinitionParser.clause_sep)) self.expression << Group( # E -> UE (self.unexpr) ^ # E -> BE (self.binexpr) ^ # E -> U ( E ) (self.unary + self.lp_lit + self.expression + self.rp_lit) ^ # E -> < E > (self.left_defa_lit + self.expression + self.right_defa_lit) ) self.binexpr << Group( # BE -> A B (self.argexpr + self.binary) ^ # BE -> B A (self.binary + self.argexpr) ^ # BE -> A B A (self.argexpr + self.binary + self.argexpr) ^ # BE -> B [ E; E ] (self.binary + self.lb_lit + self.expression + self.part_sep_lit + self.expression + self.rb_lit) ) self.unexpr << Group( # UE -> U (self.unary) ^ # UE -> U [ D ] (self.unary + self.lb_lit + self.definition + self.rb_lit) ^ # UE -> U ( U ) (self.unary + self.lp_lit + self.unary + self.rp_lit) ) self.argexpr << Group( # A -> UE (self.unexpr) ^ # A -> [ D ] (self.lb_lit + self.definition + self.rb_lit) ^ # A -> < A > (self.left_defa_lit + self.argexpr + self.right_defa_lit) ^ # A -> ' (self.prime_lit) ) self.hu, self.pos, self.en, self.lt, self.pt = ( Word(alphanums + "#-/_.'"),) * 5 self.defid = Word(nums) self.word = Group(self.hu + self.pos + self.en + self.lt + self.pt) # S -> W : D | W : D % _ #self.sen = self.definition + LineEnd() def parse(self, s): return self.definition.parseString(s, parseAll=True).asList() def create_machine(self, name, partitions): # lists are accepted because of ["=", "AGT"] if type(name) is list: name = "".join(name) # HACK until we find a good solution for defaults name = name.strip('<>') is_plur = name in self.plur_dict if is_plur: name = self.plur_dict[name] m = Machine(decode_from_proszeky(name), ConceptControl(), partitions) if is_plur: m.append(self.create_machine('more', 1), 0) return m def unify(self, machine): def __collect_machines(m, machines, is_root=False): # cut the recursion key = m.printname(), __has_other(m) if (key in machines and m in machines[key]): return if not is_root: machines[m.printname(), __has_other(m)].append(m) for partition in m.partitions: for m_ in partition: __collect_machines(m_, machines) def __has_other(m): for m_ in m.partitions[0]: if m_.printname() == "other": return True return False def __get_unified(machines, res=None): # if nothing to unify, don't if len(machines) == 1: return machines[0] # if a return machine is given, don't create a new one if res is None: prototype = machines[0] res = self.create_machine(prototype.printname(), len(prototype.partitions)) for m in machines: # if the same machine, don't add anything if id(m) == id(res): continue for p_i, p in enumerate(m.partitions): for part_m in p: if part_m.printname() != "other": res.partitions[p_i].append(part_m) part_m.del_parent_link(m, p_i) part_m.add_parent_link(res, p_i) return res def __replace(where, for_what, is_other=False, visited=None): if visited is None: visited = set() if id(where) in visited: return visited.add(id(where)) pn = for_what.printname() for p_i, p in enumerate(where.partitions): # change the partition machines for part_m_i, part_m in enumerate(p): if part_m.printname() == pn and __has_other( part_m) == is_other: where.partitions[p_i][part_m_i] = for_what for_what.add_parent_link(where, p_i) __replace(where.partitions[p_i][part_m_i], for_what, is_other, visited) # unification if there is a machine more than once on the same # partition where.partitions[p_i] = list(set(p)) machines = defaultdict(list) __collect_machines(machine, machines, is_root=True) for k, machines_to_unify in machines.iteritems(): if len(machines_to_unify[0].partitions) > 1: continue printname, is_other = k #if unification affects the root (machine), #be that the result machine if printname == machine.printname(): unified = __get_unified(machines_to_unify, machine) else: unified = __get_unified(machines_to_unify) __replace(machine, unified, is_other) def __parse_expr(self, expr, root, loop_to_defendum=True, three_parts=False): """ creates machines from a parse node and its children there should be one handler for every rule """ logging.debug("Parsing expression: {0}".format(expr)) # name shortening for classmethods cls = DefinitionParser is_binary = cls._is_binary is_unary = cls._is_unary is_tree = lambda r: type(r) == list left_part = 0 + int(three_parts) right_part = 1 + int(three_parts) most_part = 2 + int(three_parts) if (len(expr) == 1): # UE -> U if (is_unary(expr[0])): logging.debug("Parsing {0} as a unary.".format(expr[0])) return [self.create_machine(expr[0], 1)] # E -> UE | BE, A -> UE if (is_tree(expr[0])): logging.debug("Parsing {0} as a tree.".format(expr[0])) return self.__parse_expr(expr[0], root, loop_to_defendum, three_parts) if (len(expr) == 2): # BE -> A B if (is_tree(expr[0]) and is_binary(expr[1])): m = self.create_machine(expr[1], most_part) if expr[0] != ["'"]: m.append_all( self.__parse_expr(expr[0], root, loop_to_defendum, three_parts), left_part) if loop_to_defendum: m.append(root, right_part) return [m] # BE -> B A if (is_binary(expr[0]) and is_tree(expr[1])): m = self.create_machine(expr[0], most_part) if expr[1] != ["'"]: m.append_all( self.__parse_expr(expr[1], root, loop_to_defendum, three_parts), right_part) if loop_to_defendum: m.append(root, left_part) return [m] # BE -> 'B if (expr[0] == ["'"] and is_binary(expr[1])): m = self.create_machine(expr[1], most_part) #m.append(parent, 1) if loop_to_defendum: m.append(root, right_part) return [m] # BE -> B' if (is_binary(expr[0]) and expr[1] == ["'"]): m = self.create_machine(expr[0], most_part) # m.append(parent, 0) if loop_to_defendum: m.append(root, left_part) return [m] # U -> =AGT if expr[0] == deep_pre: return [self.create_machine(deep_pre + expr[1], 1)] # U -> $HUN_FROM if (expr[0] == cls.langspec_pre): return [self.create_machine(cls.langspec_pre + expr[1], 1)] # U -> #AVM if (expr[0] == avm_pre): return [self.create_machine(avm_pre + expr[1], 1)] # U -> @External_url if (expr[0] == enc_pre): return [self.create_machine(enc_pre + expr[1], 1)] if (len(expr) == 3): # UB -> A B A if (is_tree(expr[0]) and is_binary(expr[1]) and is_tree(expr[2])): m = self.create_machine(expr[1], most_part) logging.debug(expr[1]) if expr[0] != [DefinitionParser.prime]: logging.debug(expr[0]) m.append_all( self.__parse_expr(expr[0], root, loop_to_defendum, three_parts), left_part) if expr[2] != [DefinitionParser.prime]: m.append_all( self.__parse_expr(expr[2], root, loop_to_defendum, three_parts), right_part) return [m] # A -> [ D ] if (expr[0] == "[" and is_tree(expr[1]) and expr[2] == "]"): logging.debug( "Parsing expr {0} as an embedded definition".format(expr)) res = list( self.__parse_definition(expr[1], root, loop_to_defendum, three_parts)) return res # E -> < E >, U -> < U > if expr[0] == '<' and expr[2] == '>': logging.debug('E -> < E >' + str(expr[1])) return list(self.__parse_expr(expr[1], root, loop_to_defendum, three_parts)) if (len(expr) == 4): # UE -> U ( U ) # E -> U ( BE ) provisional if (is_unary(expr[0]) and expr[1] == "(" and expr[3] == ")"): logging.debug('X -> U ( Y )') if is_unary(expr[2]): m = self.create_machine(expr[2], 1) else: m = self.__parse_expr(expr[2], root, loop_to_defendum, three_parts)[0] if not three_parts: logging.warning( "for 0th partition of binary machines, " + "set three_parts=True, "+str(expr)) m.append(self.create_machine(expr[0], 1), 0) return [m] # UE -> U [ D ] if (is_unary(expr[0]) and expr[1] == "[" and is_tree(expr[2]) and expr[3] == "]"): m = self.create_machine(expr[0], 1) for parsed_expr in self.__parse_definition(expr[2], root, loop_to_defendum, three_parts): m.append(parsed_expr, 0) return [m] # E -> U ( BE ) #if (is_unary(expr[0]) and # expr[1] == "(" and # is_tree(expr[2]) and # expr[3] == ")"): # ms = self.__parse_expr(expr[2], root, loop_to_defendum, # three_parts) # # if BE was an expression with an apostrophe, then # # return of __parse_expr() is None # if len(ms) != 0: # ms[0].append(self.create_machine(expr[0], 1), 0) # # if len(ms) == 3 and ms[0] == '<': # # ms = ms[1] # if len(ms) != 1: # logging.warning("0th partition of binary machines " + # "is not implemented "+str(ms)) # return ms logging.warning('machine cannot be built '+str(expr)) if (len(expr) == 6): # BE -> B [E; E] if (is_binary(expr[0]) and expr[1] == "[" and is_tree(expr[2]) and expr[3] == ";" and is_tree(expr[4]) and expr[5] == "]"): m = self.create_machine(expr[0], 2) m.append_all( self.__parse_expr(expr[2], m, root, loop_to_defendum, three_parts), 0) m.append_all( self.__parse_expr(expr[4], m, root, loop_to_defendum, three_parts), 1) return [m] pe = ParserException( "Unknown expression in definition: {0} (len={1})".format( expr, len(expr))) logging.debug(str(pe)) logging.debug(expr) raise pe def __parse_definition(self, definition, root, loop_to_defendum=True, three_parts=False): logging.debug(str(definition)) for d in definition: yield self.__parse_expr(d, root, loop_to_defendum, three_parts)[0] def parse_into_machines(self, string, printname_index=0, add_indices=False, loop_to_defendum=True, three_parts=False): printname = string.split('\t')[printname_index] try: id_, urob, pos, def_, comment = string.split('\t')[4:] except: raise Exception(string.split('\t')) machine = self.create_machine(printname.lower(), 1) #TODO =AGT -> partition 1, =PAT -> partition 2, =TO -> ? if add_indices: machine.printname_ = machine.printname() + id_sep + id_ if def_ != '': logging.debug(def_) parsed = self.parse(def_) logging.debug(parsed) for parsed_expr in self.__parse_definition( parsed[0], machine, loop_to_defendum, three_parts): machine.append(parsed_expr, 0) self.unify(machine) return machine
curCol = col(l,s) if not(curCol < indentStack[-1] and curCol <= indentStack[-2]): raise ParseException(s,l,"not an unindent") def doUnindent(): indentStack.pop() INDENT = lineEnd.suppress() + empty + empty.copy().setParseAction(checkSubIndent) UNDENT = FollowedBy(empty).setParseAction(checkUnindent) UNDENT.setParseAction(doUnindent) stmt = Forward() suite = Group( OneOrMore( empty + stmt.setParseAction( checkPeerIndent ) ) ) identifier = Word(alphas, alphanums) funcDecl = ("def" + identifier + Group( "(" + Optional( delimitedList(identifier) ) + ")" ) + ":") funcDef = Group( funcDecl + INDENT + suite + UNDENT ) rvalue = Forward() funcCall = Group(identifier + "(" + Optional(delimitedList(rvalue)) + ")") rvalue << (funcCall | identifier | Word(nums)) assignment = Group(identifier + "=" + rvalue) stmt << ( funcDef | assignment | identifier ) print(data) parseTree = suite.parseString(data) import pprint pprint.pprint( parseTree.asList() )
class Parser(object): """A parser class for solving simple data accesses and super-indexing data :param data: Trace Object :type data: instance of :mod:`trappy.ftrace.BareTrace` or a child class (like :mod:`trappy.ftrace.FTrace`) :param pvars: A dictionary of variables that need to be accessed from within the grammar :type pvars: dict :param method: The method to be used for reindexing data This can be one of the standas :mod:`pandas.DataFrame` methods (eg. pad, bfill, nearest). The default is pad or use the last valid observation. :type method: str :param limit: The number of indices a value will be propagated when reindexing. The default is None :type limit: int :param fill: Whether to fill the NaNs in the data. The default value is True. :type fill: bool :param window: A window of time in which to apply the data accesses. By default the data accesses happen accross the whole trace. With the window parameter you can limit it to a window of time inside the trace. The first element of the tuple is the starting time and the second the ending time (set to None for end of trace). :type window: tuple - **Operators** +----------------+----------------------+---------------+ | Operation | operator | Associativity | +================+======================+===============+ | Exponentiation | \*\* | Left | +----------------+----------------------+---------------+ |Unary | \- | Right | +----------------+----------------------+---------------+ | Multiply/Divide| \*, /, //, % | Left | +----------------+----------------------+---------------+ | Add/Subtract | +, \-, | Left | +----------------+----------------------+---------------+ | Comparison | >, <, >=, <=, ==, != | Left | +----------------+----------------------+---------------+ | Logical | &&, ||, \|, & | Left | +----------------+----------------------+---------------+ - **Data Accessors** Since the goal of the grammar is to provide an easy language to access and compare data from a :mod:`trappy.trace.FTrace` object. The parser provides a simple notation to access this data. *Statically Defined Events* :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp * 2") *Aliasing* :: import trappy from trappy.stats.grammar import Parser pvars = {} pvars["THERMAL"] = trappy.thermal.Thermal trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("THERMAL:temp * 2") *Using Event Name* :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("thermal:temp * 2") The event :mod:`trappy.thermal.Thermal` is aliased as **THERMAL** in the grammar *Dynamic Events* :: import trappy from trappy.stats.grammar import Parser # Register Dynamic Event cls = trappy.register_dynamic_ftrace("my_unique_word", "event_name") pvars = {} pvars["CUSTOM"] = cls trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("CUSTOM:col * 2") .. seealso:: :mod:`trappy.dynamic.register_dynamic_ftrace` """ def __init__(self, data, pvars=None, window=(0, None), **kwargs): if pvars is None: pvars = {} self.data = data self._pvars = pvars self._accessor = Group( FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._pre_process) self._inspect = Group( FUNC_NAME + COLON + IDENTIFIER).setParseAction(self._parse_for_info) self._parse_expr = get_parse_expression( self._parse_func, self._parse_var_id) self._agg_df = pd.DataFrame() self._pivot_set = set() self._limit = kwargs.get("limit", StatConf.REINDEX_LIMIT_DEFAULT) self._method = kwargs.get("method", StatConf.REINDEX_METHOD_DEFAULT) self._fill = kwargs.get("fill", StatConf.NAN_FILL_DEFAULT) self._window = window def solve(self, expr): """Parses and solves the input expression :param expr: The input expression :type expr: str :return: The return type may vary depending on the expression. For example: **Vector** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp * 2") **Scalar** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("numpy.mean(trappy.thermal.Thermal:temp)") **Vector Mask** :: import trappy from trappy.stats.grammar import Parser trace = trappy.FTrace("path/to/trace/file") parser = Parser(trace) parser.solve("trappy.thermal.Thermal:temp > 65000") """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] """ # Pre-process accessors for indexing self._accessor.searchString(expr) return self._parse_expr.parseString(expr)[0] def _pivot(self, cls, column): """Pivot Data for concatenation""" data_frame = self._get_data_frame(cls) data_frame = handle_duplicate_index(data_frame) new_index = self._agg_df.index.union(data_frame.index) if hasattr(cls, "pivot") and cls.pivot: pivot = cls.pivot pivot_vals = list(np.unique(data_frame[pivot].values)) data = {} for val in pivot_vals: data[val] = data_frame[data_frame[pivot] == val][[column]] if len(self._agg_df): data[val] = data[val].reindex( index=new_index, method=self._method, limit=self._limit) return pd.concat(data, axis=1).swaplevel(0, 1, axis=1) if len(self._agg_df): data_frame = data_frame.reindex( index=new_index, method=self._method, limit=self._limit) return pd.concat({StatConf.GRAMMAR_DEFAULT_PIVOT: data_frame[ [column]]}, axis=1).swaplevel(0, 1, axis=1) def _pre_process(self, tokens): """Pre-process accessors for super-indexing""" params = tokens[0] if params[1] in self._agg_df.columns: return self._agg_df[params[1]] cls = params[0] column = params[1] if cls in self._pvars: cls = self._pvars[cls] elif cls in self.data.class_definitions: cls = self.data.class_definitions[cls] else: cls = str_to_attr(cls) data_frame = self._pivot(cls, column) self._agg_df = pd.concat( [self._agg_df, data_frame], axis=1) if self._fill: self._agg_df = self._agg_df.fillna(method="pad") return self._agg_df[params[1]] def _parse_for_info(self, tokens): """Parse Action for inspecting data accessors""" params = tokens[0] cls = params[0] column = params[1] info = {} info["pivot"] = None info["pivot_values"] = None if cls in self._pvars: cls = self._pvars[cls] elif cls in self.data.class_definitions: cls = self.data.class_definitions[cls] else: cls = str_to_attr(cls) data_frame = self._get_data_frame(cls) info["class"] = cls info["length"] = len(data_frame) if hasattr(cls, "pivot") and cls.pivot: info["pivot"] = cls.pivot info["pivot_values"] = list(np.unique(data_frame[cls.pivot])) info["column"] = column info["column_present"] = column in data_frame.columns return info def _parse_var_id(self, tokens): """A function to parse a variable identifier """ params = tokens[0] try: return float(params) except (ValueError, TypeError): try: return self._pvars[params] except KeyError: return self._agg_df[params[1]] def _parse_func(self, tokens): """A function to parse a function string""" params = tokens[0] func_name = params[0] if func_name in self._pvars and isinstance( self._pvars[func_name], types.FunctionType): func = self._pvars[func_name] else: func = str_to_attr(params[0]) return func(*params[1]) def _get_data_frame(self, cls): """Get the data frame from the BareTrace object, applying the window if set""" data_frame = getattr(self.data, cls.name).data_frame if self._window[1] is None: data_frame = data_frame.loc[self._window[0]:] else: data_frame = data_frame.loc[self._window[0]:self._window[1]] return data_frame def ref(self, mask): """Reference super indexed data with a boolean mask :param mask: A boolean :mod:`pandas.Series` that can be used to reference the aggregated data in the parser :type mask: :mod:`pandas.Series` :return: aggregated_data[mask] """ return self._agg_df[mask] def inspect(self, accessor): """A function to inspect the accessor for information :param accessor: A data accessor of the format <event>:<column> :type accessor: str :return: A dictionary of information """ return self._inspect.parseString(accessor)[0]
class QifParser: def __init__(self, debug=False): account_items = { 'N' : "Name", 'T' : "AccountType", 'D' : "Description", 'L' : "CreditLimit", 'X' : "UnknownField", 'B' : "Balance", '/' : "BalanceDate", '$' : "Balance" } noninvestment_items = { 'D' : "Date", 'T' : "Amount", 'U' : "Amount2", 'C' : "Cleared", 'N' : "Number", 'P' : "Payee", 'M' : "Memo", 'L' : "Category", 'A' : "Address", 'S' : "SplitCategory", 'E' : "SplitMemo", '$' : "SplitAmount", '-' : "NegativeSplitAmount" } investment_items = { 'D' : "Date", 'N' : "Action", 'Y' : "Security", 'I' : "Price", 'Q' : "Quantity", 'T' : "Amount", 'C' : "Cleared", 'P' : "Text", 'M' : "Memo", 'O' : "Commission", 'L' : "TransferAccount", '$' : "TransferAmount" } category_items = { 'N' : "Name", 'D' : "Description", 'T' : "TaxRelated", 'I' : "IncomeCategory", 'E' : "ExpenseCategory", 'B' : "BudgetAmount", 'R' : "TaxSchedule" } class_items = { 'N' : "Name", 'D' : "Description" } options = Group(CaselessLiteral('!Option:') + restOfLine).suppress() banktxns = Group(CaselessLiteral('!Type:Bank').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("BankTransactions") cashtxns = Group(CaselessLiteral('!Type:Cash').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CashTransactions") ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(), CaselessLiteral('!Type!CCard').suppress()]) + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + ZeroOrMore(self._items(investment_items)) ).setResultsName("InvestmentTransactions") acctlist = Group(CaselessLiteral('!Account').suppress() + ZeroOrMore(Or([self._items(account_items, name="AccountInfo")])) ).setResultsName("AccountList") category = Group(CaselessLiteral('!Type:Cat').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("CategoryList") classlist = Group(CaselessLiteral('!Type:Class').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("ClassList") self.parser = Group(ZeroOrMore(White()).suppress() + ZeroOrMore(acctlist).suppress() + OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) + ZeroOrMore(White()).suppress() ).setResultsName("QifStatement") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _items(self, items, name="Transaction"): item_list = [] for (code, name) in items.iteritems(): item = self._item(code, name) item_list.append(item) return Group(OneOrMore(Or(item_list)) + oneOf('^EUR ^').setResultsName('Currency') + LineEnd().suppress() ).setResultsName(name) def _item(self, code, name): return CaselessLiteral(code).suppress() + \ restOfLine.setResultsName(name) + \ LineEnd().suppress() def parse(self, qif): return self.parser.parseString(qif)
ViaTok = _paren_stmt( "via", OptionalList(AtTok, Size1DTok, DrillTok, LayersTok, NetNumberTok)) SegmentTok = _paren_stmt( "segment", OptionalList(StartTok, EndTok, WidthTok, LayerTok, NetNumberTok, TStamp)) PCBElementTok = GR_CircleTok | GR_ArcTok | ModuleTok | ViaTok | SegmentTok | NetTok | Net_ClassTok | \ PageTok | LayerListTok | GeneralSettingsTok | SetupTok PCBElements = ZeroOrMore(PCBElementTok) VersionTok = _uint_param("version") HostTok = _paren_stmt("host", AnystringTok("name"), AnystringTok("version")) KiCAD_PCBTok = _paren_stmt("kicad_pcb", VersionTok, HostTok, PCBElements) if __name__ == "__main__": result = StartTok.parseString("(start 123.456 789)") print(result) print(result.x, result.y) result = FP_LineTok.parseString( "(fp_line (start 1 1) (end 1 2) (layer F.Cu) (width 0.1))") print(result) print(result.start, result.end, result.layer, result.width) test_str = """(pad 1 thru_hole rect (at -0.95 0) (size 0.7 1.3) (drill 0.3) (layers *.Cu *.Mask) (net 2 VO))""" result = PadTok.parseString(test_str) print(result) test_str = """(via (at 150.7 106.1) (size 0.6) (drill 0.4) (layers F.Cu B.Cu) (net 3))""" result = ViaTok.parseString(test_str) print(result)
class QifParser: def __init__(self, debug=False): account_items = { 'N' : "Name", 'T' : "AccountType", 'D' : "Description", 'L' : "CreditLimit", 'X' : "UnknownField", 'B' : "Balance", '/' : "BalanceDate", '$' : "Balance" } noninvestment_items = { 'D' : "Date", 'T' : "Amount", 'U' : "Amount2", 'C' : "Cleared", 'N' : "Number", 'P' : "Payee", 'M' : "Memo", 'L' : "Category", 'A' : "Address", 'S' : "SplitCategory", 'E' : "SplitMemo", '$' : "SplitAmount", '-' : "NegativeSplitAmount" } investment_items = { 'D' : "Date", 'N' : "Action", 'Y' : "Security", 'I' : "Price", 'Q' : "Quantity", 'T' : "Amount", 'C' : "Cleared", 'P' : "Text", 'M' : "Memo", 'O' : "Commission", 'L' : "TransferAccount", '$' : "TransferAmount" } category_items = { 'N' : "Name", 'D' : "Description", 'T' : "TaxRelated", 'I' : "IncomeCategory", 'E' : "ExpenseCategory", 'B' : "BudgetAmount", 'R' : "TaxSchedule" } class_items = { 'N' : "Name", 'D' : "Description" } options = Group(CaselessLiteral('!Option:') + restOfLine).suppress() banktxns = Group(CaselessLiteral('!Type:Bank').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("BankTransactions") cashtxns = Group(CaselessLiteral('!Type:Cash').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CashTransactions") ccardtxns = Group(Or([CaselessLiteral('!Type:CCard').suppress(), CaselessLiteral('!Type!CCard').suppress()]) + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") liabilitytxns = Group(CaselessLiteral('!Type:Oth L').suppress() + ZeroOrMore(Or([self._items(noninvestment_items), options])) ).setResultsName("CreditCardTransactions") invsttxns = Group(CaselessLiteral('!Type:Invst').suppress() + ZeroOrMore(self._items(investment_items)) ).setResultsName("InvestmentTransactions") acctlist = Group(CaselessLiteral('!Account').suppress() + ZeroOrMore(Or([self._items(account_items, name="AccountInfo")])) ).setResultsName("AccountList") category = Group(CaselessLiteral('!Type:Cat').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("CategoryList") classlist = Group(CaselessLiteral('!Type:Class').suppress() + ZeroOrMore(self._items(category_items)) ).setResultsName("ClassList") self.parser = Group(ZeroOrMore(White()).suppress() + ZeroOrMore(acctlist).suppress() + OneOrMore(ccardtxns | cashtxns | banktxns | liabilitytxns | invsttxns) + ZeroOrMore(category | classlist).suppress() + ZeroOrMore(White()).suppress() ).setResultsName("QifStatement") if (debug): self.parser.setDebugActions(_ofxtoolsStartDebugAction, _ofxtoolsSuccessDebugAction, _ofxtoolsExceptionDebugAction) def _items(self, items, name="Transaction"): item_list = [] for (code, name) in items.items(): item = self._item(code, name) item_list.append(item) return Group(OneOrMore(Or(item_list)) + oneOf('^EUR ^').setResultsName('Currency') + LineEnd().suppress() ).setResultsName(name) def _item(self, code, name): return CaselessLiteral(code).suppress() + \ restOfLine.setResultsName(name) + \ LineEnd().suppress() def parse(self, qif): return self.parser.parseString(qif)
class MyHL(object): def __init__(self): identifier = Word(alphas + alphanums) identifier_list = identifier + ZeroOrMore(Word(',') + identifier) self.number_exp = Group(Word(alphanums) + OneOrMore(Word("+-/*%", exact=1) + Word(alphanums))) expression = self.number_exp | Group(Literal('"') + Word(printables, excludeChars='"') + Literal('"')) | identifier datatype = Literal('number') | Literal('word') self.print_statement = "print" + identifier + ";" self.read_statement = "read" + identifier + ";" self.assignment_statement = identifier + "=" + expression + ";" self.variable_declaration = identifier_list + "use as" + datatype + ";" self.variable_stack = {} # with open('input.uy', 'r') as inputFile: # code = [i.strip() for i in inputFile.readlines()] # self.run(code) def run(self, code, parent=None): code = self.clean_code(code) self.variable_stack = {} if code[0] != "begin vars": print "Code should begin with 'begin vars' statement." err_line = -1 elif code[-1] != "end vars" and code[-1] != "end statements": print "Code should end with 'end vars' or 'end statements' statement." err_line = -1 else: end_vars = False begin_stmt = False error = False for i in code[1:-1]: if not i.startswith('//'): if i == "end vars": end_vars = True elif i == "begin statements": if end_vars: var_type = "program" begin_stmt = True else: print "Formal declaration of variables must end with 'end vars' statement." error = True err_line = -1 break elif end_vars and not begin_stmt: print "Program statements should begin with 'begin statements' statement." error = True err_line = -1 break else: success, err_code = self.check_and_execute(i, begin_stmt, parent) if not success: error = True if err_code == 1: print "Syntax Error at line ", code.index(i) + 1 err_line = code.index(i) elif err_code == 2: print "Undeclared variable at line ", code.index(i) + 1 err_line = code.index(i) elif err_code == 3: print "Type mismatch error at line ", code.index(i) + 1 err_line = code.index(i) elif err_code == 4: print "Execution halted at line ", code.index(i) + 1 err_line = code.index(i) break if error: if err_line >= 0: parent.setLineFormat(err_line) def check_and_execute(self, statement, begin_stmt, parent=None): if not begin_stmt: try: variables = self.variable_declaration.parseString(statement, parseAll=True)[0::2] for v in variables[:-1]: self.variable_stack[v] = [None, variables[-1]] except: return False, 1 else: if statement.startswith('print'): try: identifier = self.print_statement.parseString(statement, parseAll=True)[1] if identifier in self.variable_stack: print self.variable_stack[identifier][0] else: return False, 2 except: return False, 1 elif statement.startswith('read'): try: identifier = self.read_statement.parseString(statement, parseAll=True)[1] if identifier in self.variable_stack: if self.variable_stack[identifier][1] == 'number': val, ok = parent.getInt(identifier) else: val, ok = parent.getString(identifier) # if not parent: # self.variable_stack[identifier][0] = raw_input("enter value for variable %s:" % identifier ) # else: if ok: self.variable_stack[identifier][0] = val else: return False, 4 else: return False, 2 except: return False, 1 else: try: ass_stmt = self.assignment_statement.parseString(statement, parseAll=True) try: self.number_exp.parseString("".join(ass_stmt[-2]), parseAll=True) for i,v in enumerate(ass_stmt[-2]): if not v in ['+', '-', '/' , '*', '%']: try: int(v) except: if v in self.variable_stack: ass_stmt[-2][i] = str(self.variable_stack[v][0]) else: return False, 2 expression = self.arithmetic_ops("".join(ass_stmt[-2])) except: expression = ass_stmt[-2] finally: if not ass_stmt[0] in self.variable_stack: return False, 2 if self.variable_stack[ass_stmt[0]][1] == 'number': try: self.variable_stack[ass_stmt[0]][0] = int(expression) except: return False, 3 else: if expression[0] == '"' and expression[-1] == '"': self.variable_stack[ass_stmt[0]][0] = expression[1] else: return False, 3 except: return False, 1 return True, 0 def arithmetic_ops(self, statement): return eval(statement) def clean_code(self, code): ccode = [] for c in code: if c != '': if not c.startswith('//'): ccode.append(c.split('//')[0].strip()) else: ccode.append(c) return ccode
class OfcParser: """Dirt-simple OFC parser for interpreting OFC documents.""" def __init__(self, debug=False): aggregate = Forward().setResultsName("OFC") aggregate_open_tag, aggregate_close_tag = self._tag() content_open_tag = self._tag(closed=False) content = Group(content_open_tag + CharsNotIn("<\r\n")) aggregate << Group(aggregate_open_tag \ + Dict(OneOrMore(aggregate | content)) \ + aggregate_close_tag) self.parser = Group(aggregate).setResultsName("document") if (debug): self.parser.setDebugActions(ofxtools._ofxtoolsStartDebugAction, ofxtools._ofxtoolsSuccessDebugAction, ofxtools._ofxtoolsExceptionDebugAction) def _tag(self, closed=True): """Generate parser definitions for OFX tags.""" openTag = Literal("<").suppress() + Word(alphanums + ".") \ + Literal(">").suppress() if (closed): closeTag = Group("</" + Word(alphanums + ".") + ">" + ZeroOrMore(White())).suppress() return openTag, closeTag else: return openTag def parse(self, ofc): """Parse a string argument and return a tree structure representing the parsed document.""" ofc = self.add_zero_to_empty_ledger_tag(ofc) ofc = self.remove_inline_closing_tags(ofc) ofc = ofxtools.util.strip_empty_tags(ofc) ofc = self._translate_chknum_to_checknum(ofc) # if you don't have a good stomach, skip this part # XXX:needs better solution import sys sys.setrecursionlimit(5000) try: return self.parser.parseString(ofc).asDict() except ParseException: fixed_ofc = self.fix_ofc(ofc) return self.parser.parseString(fixed_ofc).asDict() def add_zero_to_empty_ledger_tag(self, ofc): """ Fix an OFC, by adding zero to LEDGER blank tag """ return re.compile(r'<LEDGER>(\D*\n)', re.UNICODE).sub(r'<LEDGER>0\1', ofc) def remove_inline_closing_tags(self, ofc): """ Fix an OFC, by removing inline closing 'tags' """ return re.compile(r'(\w+.*)<\/\w+>', re.UNICODE).sub(r'\1', ofc) def fix_ofc(self, ofc): """ Do some magic to fix an bad OFC """ ofc = self._remove_bad_tags(ofc) ofc = self._fill_dummy_tags(ofc) return self._inject_tags(ofc) def _remove_bad_tags(self, ofc): ofc_without_trnrs = re.sub(r'<[/]*TRNRS>', '', ofc) return re.sub(r'<[/]*CLTID>\w+', '', ofc_without_trnrs) def _fill_dummy_tags(self, ofc): expression = r'(<%s>)[^\w+]' replacement = r'<%s>0\n' ofc = re.sub(expression % 'FITID', replacement % 'FITID' , ofc) filled_ofc = re.sub(expression % 'CHECKNUM', replacement % 'CHECKNUM' , ofc) return filled_ofc def _translate_chknum_to_checknum(self, ofc): """ Some banks put an CHKNUM instead of CHECKNUM. this method translates CHKNUM to CHECKNUM in order to parse this information correctly """ return re.sub('CHKNUM', 'CHECKNUM', ofc) def _inject_tags(self, ofc): tags ="<OFC>\n<ACCTSTMT>\n<ACCTFROM>\n<BANKID>0\n<ACCTID>0\n<ACCTTYPE>0\n</ACCTFROM>\n" if not re.findall(r'<OFC>\w*\s*<ACCTSTMT>', ofc): return ofc.replace('<OFC>', tags).replace('</OFC>', '</ACCTSTMT>\n</OFC>')
def parse_parsers(parsers): parser, starter = p4_parser() all_parsers = Group(starter & OneOrMore(parser)) return all_parsers.parseString(parsers)[0]