def parse(cls, filename, logs=None): states = defaultdict(list) edges = defaultdict(list) nodes = {} config = ConfigParser.RawConfigParser() try: config.read(filename) except ConfigParser.ParsingError as e: estr = str(e) if len(estr) > 400: estr = estr[:200] + '\n...\n' + estr[-200:] raise PyBinCATException( "Invalid INI format for parsed output file %s.\n%s" % (filename, estr)) if len(config.sections()) == 0: raise PyBinCATException( "Parsing error: no sections in %s, check analysis logs" % filename) return None cls.arch = config.get('loader', 'architecture') for section in config.sections(): if section == 'edges': for edgename, edge in config.items(section): src, dst = edge.split(' -> ') edges[src].append(dst) continue elif section.startswith('node = '): node_id = section[7:] state = State.parse(node_id, dict(config.items(section))) address = state.address if state.final: states[address].insert(0, state.node_id) else: states[address].append(state.node_id) nodes[state.node_id] = state continue elif section == 'loader': continue CFA._valcache = dict() cfa = cls(states, edges, nodes) if logs: cfa.logs = open(logs, 'rb').read() return cfa
def regtypes(self): if self._regtypes is None: try: self.parse_regaddrs() except Exception as e: import traceback traceback.print_exc(e) raise PyBinCATException( "Cannot parse taint or type data at address %s\n%s" % (self.address, e)) return self._regtypes
def parse_regaddrs(self): """ Parses entries containing taint & type data """ self._regaddrs = {} self._regtypes = {} for k, v in self._outputkv.iteritems(): if k == "description": self.description = k continue if k.startswith("t-"): typedata = True k = k[2:] else: typedata = False m = RE_REGION_ADDR.match(k) if not m: raise PyBinCATException("Parsing error (key=%r)" % (k,)) region = m.group("region") addr = m.group("addr") if region == "mem": # use memreg as region instead of 'mem' # ex. "s0xabcd, s0xabce" "g0x24*32" # region = '' if '*' in addr: # single repeated value regaddr, repeat = addr.split('*') length = 8 m = RE_VALTAINT.match(regaddr) region, addr = m.group('memreg'), m.group('value') v = ', '.join([v] * int(repeat)) else: regaddr1, regaddr2 = addr.split(', ') m = RE_VALTAINT.match(regaddr1) region1, addr = m.group('memreg'), m.group('value') m = RE_VALTAINT.match(regaddr2) region2, addr2 = m.group('memreg'), m.group('value') assert region1 == region2 region = region1 length = 8 # XXX allow non-aligned access (current: assume no overlap) elif region and region[0] == "h": # ignore for now -- indicates whether this Heap region has been # allocated or freed continue elif region == "reg": length = reg_len(addr) # build value concat_value = [] regaddr = Value.parse(region, addr, '0', 0) if typedata: if regaddr in self._regtypes: self._regtypes[regaddr] += " -- " + v else: self._regtypes[regaddr] = v continue if (v, length) not in CFA._valcache: # add to cache off_vals = [] for idx, val in enumerate(v.split(', ')): m = RE_VALTAINT.match(val) if not m: raise PyBinCATException( "Parsing error (value=%r)" % (v,)) memreg = m.group("memreg") strval = m.group("value") taint = m.group("taint") new_value = Value.parse(memreg, strval, taint, length) if new_value.region: curregaddr = regaddr + idx regstr = "region " + new_value.region if curregaddr in self._regtypes: self._regtypes[curregaddr] = ( regstr + " - " + self._regtypes[curregaddr]) else: self._regtypes[curregaddr] = regstr # concatenate concat_value.append(new_value) off_vals.append(concat_value) CFA._valcache[(v, length)] = off_vals for val in CFA._valcache[(v, length)]: self._regaddrs[regaddr] = val del self._outputkv
def parse(cls, filename, logs=None): addr_nodes = defaultdict(list) edges = defaultdict(list) nodes = {} taintsrcs = {} cfa = cls(addr_nodes, edges, nodes, taintsrcs) config = ConfigParser.RawConfigParser() try: config.read(filename) except ConfigParser.ParsingError as e: estr = str(e) if len(estr) > 400: estr = estr[:200] + '\n...\n' + estr[-200:] raise PyBinCATException( "Invalid INI format for parsed output file %s.\n%s" % (filename, estr)) if len(config.sections()) == 0: raise PyBinCATException( "Parsing error: no sections in %s, check analysis logs" % filename) cls.arch = config.get('program', 'architecture') cls.mem_sz = config.get('program', 'mem_sz') # parse taint sources first -- will be used when parsing Node # sorting ensures that a node will be parsed before its unrels sections = sorted(config.sections(), reverse=True) if 'taint sources' in config.sections(): for srcid, srcname in config.items('taint sources'): taintsrcs[int(srcid)] = srcname sections.remove('taint sources') maxtaintsrcid = max(list(taintsrcs)+[0]) for section in sections: if section == 'edges': for edgename, edge in config.items(section): src, dst = edge.split(' -> ') edges[src].append(dst) continue elif section.startswith('node = '): node_id = section[7:] node = Node.parse(node_id, dict(config.items(section)), maxtaintsrcid) address = node.address if node.final: addr_nodes[address].insert(0, node.node_id) else: addr_nodes[address].append(node.node_id) nodes[node.node_id] = node continue elif section.startswith('node '): m = RE_NODE_UNREL.match(section) unrel_id = m.group('unrelid') new_unrel = Unrel.parse(unrel_id, dict(config.items(section))) cfa[m.group('nodeid')].unrels[unrel_id] = new_unrel # unrel elif section == 'loader': continue CFA._valcache = dict() if logs: cfa.logs = open(logs, 'rb').read() return cfa
def parse_regaddrs(self): """ Parses entries containing taint & type data """ self._regaddrs = {} self._regtypes = {} for k, v in self._outputkv.iteritems(): if k.startswith("t-"): typedata = True k = k[2:] else: typedata = False m = RE_REGION_ADDR.match(k) if not m: raise PyBinCATException("Parsing error (key=%r)" % (k, )) region = m.group("region") addr = m.group("addr") if region == "mem": # use memreg as region instead of 'mem' # ex. "s0xabcd, s0xabce" "g0x24*32" # region in ['s', 'g', 'h'] if '*' in addr: # single repeated value regaddr, l = addr.split('*') length = 8 m = RE_VALTAINT.match(regaddr) region, addr = m.group('memreg'), m.group('value') v = ', '.join([v] * int(l)) else: regaddr1, regaddr2 = addr.split(', ') m = RE_VALTAINT.match(regaddr1) region1, addr = m.group('memreg'), m.group('value') m = RE_VALTAINT.match(regaddr2) region2, addr2 = m.group('memreg'), m.group('value') assert region1 == region2 region = region1 length = 8 # XXX allow non-aligned access (current: assume no overlap) elif region == "reg": length = reg_len(addr) # build value concat_value = [] regaddr = Value.parse(region, addr, '0', 0) if typedata: self._regtypes[regaddr] = v.split(', ') continue if (v, length) not in CFA._valcache: # add to cache off_vals = [] for idx, val in enumerate(v.split(', ')): m = RE_VALTAINT.match(val) if not m: raise PyBinCATException("Parsing error (value=%r)" % (v, )) memreg = m.group("memreg") strval = m.group("value") taint = m.group("taint") new_value = Value.parse(memreg, strval, taint, length) # concatenate concat_value.append(new_value) off_vals.append(concat_value) CFA._valcache[(v, length)] = off_vals for val in CFA._valcache[(v, length)]: self._regaddrs[regaddr] = val del (self._outputkv)