def preprocess(identifier, filenames): count = 0 total = len(filenames) final = [] # holds all raw sequences after preprocessing treeDict = {} for file in filenames: print file with open(os.path.join(dirname, file)) as f: for line in iter(f.readline, ''): contents = line.split() # any/all -> [any] is if there's at least 1 weka mention in each line of callgraph maker result, [all] requires callgraph edge nodes to both contain weka in their full names # if any([True if identifier in item.lower() else False for item in contents]): if identifier in contents[1].lower(): # if there aren't 2 tokens in each line, treat file as corrupt assert (len(contents) == 2) # build a caller->calee sequence tree. try: treeDict[contents[0]].append(contents[1]) except KeyError: # the first time the key will not exist in dict, so initialize the node treeDict[contents[0]] = [contents[1]] # foreach node get all seqs seqs = [treeDict[node] for node in treeDict.keys()] # no use for single item sequences. seqs = filter(lambda item: (len(item) > 1), seqs) count += 1 print "Processed->", (float(count) / float(total)) * 100, "%" # extend the list holding all the sequences final.extend(seqs) # finally remove too lengthy (too specific) and too short (too broad) seqs and seqs with $1 token in them final = filter(lambda x: len(x) >= 3, final) # additional filtering to remove tokens and cleanup Nulls, and single item seqs final = map(lambda x: filter(lambda i: ':' in [j for j in i], x), final) final = map(lambda x: filter(lambda i: '$' not in [j for j in i], x), final) final = filter(None, final) final = map(lambda x: list(oset(x)), final) final = filter(lambda x: len(x) > 1, final) final = filter(lambda x: ':' in [i for i in x[0]], final) print "Final constructed!", len(final), max(map(len, final)) return final
def rotate( pre, keys, dig, sn=1, version=Version, kind=Serials.json, sith=None, nxt="", toad=None, wits=None, # prior existing wits cuts=None, adds=None, data=None, ): """ Returns serder of rotation event message. Utility function to automate creation of rotation events. Parameters: pre keys dig version kind sn sith nxt toad cuts adds data """ vs = Versify(version=version, kind=kind, size=0) ilk = Ilks.rot if sn < 1: raise ValueError("Invalid sn = {} for rot.".format(sn)) if sith is None: sith = max(1, ceil(len(keys) / 2)) if isinstance(sith, int): if sith < 1 or sith > len(keys): # out of bounds sith raise ValueError("Invalid sith = {} for keys = {}".format( sith, keys)) else: # list sith not yet supported raise ValueError("invalid sith = {}.".format(sith)) wits = wits if wits is not None else [] witset = oset(wits) if len(witset) != len(wits): raise ValueError("Invalid wits = {}, has duplicates.".format(wits)) cuts = cuts if cuts is not None else [] cutset = oset(cuts) if len(cutset) != len(cuts): raise ValueError("Invalid cuts = {}, has duplicates.".format(cuts)) if (witset & cutset) != cutset: # some cuts not in wits raise ValueError( "Invalid cuts = {}, not all members in wits.".format(cuts)) adds = adds if adds is not None else [] addset = oset(adds) if len(addset) != len(adds): raise ValueError("Invalid adds = {}, has duplicates.".format(adds)) if cutset & addset: # non empty intersection raise ValueError("Intersecting cuts = {} and adds = {}.".format( cuts, adds)) if witset & addset: # non empty intersection raise ValueError("Intersecting wits = {} and adds = {}.".format( wits, adds)) newitset = (witset - cutset) | addset if len(newitset) != (len(wits) - len(cuts) + len(adds)): # redundant? raise ValueError( "Invalid member combination among wits = {}, cuts ={}, " "and adds = {}.".format(wits, cuts, adds)) if toad is None: if not newitset: toad = 0 else: toad = max(1, ceil(len(newitset) / 2)) if newitset: if toad < 1 or toad > len(newitset): # out of bounds toad raise ValueError("Invalid toad = {} for resultant wits = {}" "".format(toad, list(newitset))) else: if toad != 0: # invalid toad raise ValueError("Invalid toad = {} for resultant wits = {}" "".format(toad, list(newitset))) data = data if data is not None else [] ked = dict( vs=vs, # version string pre=pre, # qb64 prefix sn="{:x}".format(sn), # hex string no leading zeros lowercase ilk=ilk, dig=dig, # qb64 digest of prior event sith="{:x}".format(sith), # hex string no leading zeros lowercase keys=keys, # list of qb64 nxt=nxt, # hash qual Base64 toad="{:x}".format(toad), # hex string no leading zeros lowercase cuts=cuts, # list of qb64 may be empty adds=adds, # list of qb64 may be empty data=data, # list of seals ) return Serder(ked=ked) # return serialized ked