def preprocess(identifier, filenames):
    count = 0
    total = len(filenames)
    final = []  # holds all raw sequences after preprocessing
    treeDict = {}

    for file in filenames:
        print file
        with open(os.path.join(dirname, file)) as f:
            for line in iter(f.readline, ''):

                contents = line.split()
                # any/all -> [any] is if there's at least 1 weka mention in each line of callgraph maker result, [all] requires callgraph edge nodes to both contain weka in their full names
                # if any([True if identifier in item.lower() else False for item in contents]):
                if identifier in contents[1].lower():

                    # if there aren't 2 tokens in each line, treat file as corrupt
                    assert (len(contents) == 2)

                    # build a caller->calee sequence tree.
                    try:
                        treeDict[contents[0]].append(contents[1])
                    except KeyError:
                        # the first time the key will not exist in dict, so initialize the node
                        treeDict[contents[0]] = [contents[1]]

            # foreach node get all seqs
            seqs = [treeDict[node] for node in treeDict.keys()]
            # no use for single item sequences.
            seqs = filter(lambda item: (len(item) > 1), seqs)

            count += 1

            print "Processed->", (float(count) / float(total)) * 100, "%"

            # extend the list holding all the sequences
            final.extend(seqs)

    # finally remove too lengthy (too specific) and too short (too broad) seqs and seqs with $1 token in them
    final = filter(lambda x: len(x) >= 3, final)
    # additional filtering to remove tokens and cleanup Nulls, and single item seqs
    final = map(lambda x: filter(lambda i: ':' in [j for j in i], x), final)
    final = map(lambda x: filter(lambda i: '$' not in [j for j in i], x),
                final)
    final = filter(None, final)
    final = map(lambda x: list(oset(x)), final)
    final = filter(lambda x: len(x) > 1, final)
    final = filter(lambda x: ':' in [i for i in x[0]], final)
    print "Final constructed!", len(final), max(map(len, final))

    return final
Example #2
0
def rotate(
    pre,
    keys,
    dig,
    sn=1,
    version=Version,
    kind=Serials.json,
    sith=None,
    nxt="",
    toad=None,
    wits=None,  # prior existing wits
    cuts=None,
    adds=None,
    data=None,
):
    """
    Returns serder of rotation event message.
    Utility function to automate creation of rotation events.

     Parameters:
        pre
        keys
        dig
        version
        kind
        sn
        sith
        nxt
        toad
        cuts
        adds
        data
    """
    vs = Versify(version=version, kind=kind, size=0)
    ilk = Ilks.rot

    if sn < 1:
        raise ValueError("Invalid sn = {} for rot.".format(sn))

    if sith is None:
        sith = max(1, ceil(len(keys) / 2))

    if isinstance(sith, int):
        if sith < 1 or sith > len(keys):  # out of bounds sith
            raise ValueError("Invalid sith = {} for keys = {}".format(
                sith, keys))
    else:  # list sith not yet supported
        raise ValueError("invalid sith = {}.".format(sith))

    wits = wits if wits is not None else []
    witset = oset(wits)
    if len(witset) != len(wits):
        raise ValueError("Invalid wits = {}, has duplicates.".format(wits))

    cuts = cuts if cuts is not None else []
    cutset = oset(cuts)
    if len(cutset) != len(cuts):
        raise ValueError("Invalid cuts = {}, has duplicates.".format(cuts))

    if (witset & cutset) != cutset:  #  some cuts not in wits
        raise ValueError(
            "Invalid cuts = {}, not all members in wits.".format(cuts))

    adds = adds if adds is not None else []
    addset = oset(adds)
    if len(addset) != len(adds):
        raise ValueError("Invalid adds = {}, has duplicates.".format(adds))

    if cutset & addset:  # non empty intersection
        raise ValueError("Intersecting cuts = {} and  adds = {}.".format(
            cuts, adds))

    if witset & addset:  # non empty intersection
        raise ValueError("Intersecting wits = {} and  adds = {}.".format(
            wits, adds))

    newitset = (witset - cutset) | addset

    if len(newitset) != (len(wits) - len(cuts) + len(adds)):  # redundant?
        raise ValueError(
            "Invalid member combination among wits = {}, cuts ={}, "
            "and adds = {}.".format(wits, cuts, adds))

    if toad is None:
        if not newitset:
            toad = 0
        else:
            toad = max(1, ceil(len(newitset) / 2))

    if newitset:
        if toad < 1 or toad > len(newitset):  # out of bounds toad
            raise ValueError("Invalid toad = {} for resultant wits = {}"
                             "".format(toad, list(newitset)))
    else:
        if toad != 0:  # invalid toad
            raise ValueError("Invalid toad = {} for resultant wits = {}"
                             "".format(toad, list(newitset)))

    data = data if data is not None else []

    ked = dict(
        vs=vs,  # version string
        pre=pre,  # qb64 prefix
        sn="{:x}".format(sn),  # hex string no leading zeros lowercase
        ilk=ilk,
        dig=dig,  #  qb64 digest of prior event
        sith="{:x}".format(sith),  # hex string no leading zeros lowercase
        keys=keys,  # list of qb64
        nxt=nxt,  # hash qual Base64
        toad="{:x}".format(toad),  # hex string no leading zeros lowercase
        cuts=cuts,  # list of qb64 may be empty
        adds=adds,  # list of qb64 may be empty
        data=data,  # list of seals
    )

    return Serder(ked=ked)  # return serialized ked