Beispiel #1
0
def functionMap():
    functions = {
        'Shannon': lambda x: '{0:10.8f}'.format(stats.graphEntropy(x)),
        'enum': linkoToEnum
    }

    functionHelp = ('Shannon (Shannon entropy applied to the'
                    ' linkograph), '
                    'enum (The enumeration for the linkograph).')

    return functions, functionHelp
def regressionTest():
    # Figure 1 from linkography.pdf
    test_linkograph = llinkoCreate.Linkograph()
    test_linkograph.append((set(), set(), {1, 2, 3}))
    test_linkograph.append((set(), {0}, {2}))
    test_linkograph.append((set(), {0, 1}, set()))
    test_linkograph.append((set(), {0}, set()))
    if (4 != lstats.links(test_linkograph) or (1.125, 4.5, 1.0, 1.75, 7, 2) !=
            lstats.calculateCartesianStatistics(test_linkograph)
            or 0.0000000000000001 < abs(
                lstats.percentageOfLinks(test_linkograph) - 0.6666666666666666)
            or 0.0000000000000001 <
            abs(lstats.graphEntropy(test_linkograph) - 0.9182958340544896)):
        print("error calculating statistics for training data")
def processSession(session_filename, writer):
    #####################
    # generate linkograph
    #####################

    # label commands
    label_rules = open("abstraction.json", "r")
    labeler = llabels.Labeler(json.load(label_rules))
    label_rules.close()
    commands = open(session_filename, "r")
    json_commands = json.load(commands)
    commands.close()

    if 2 > len(json_commands):
        print("can't process", session_filename,
              "because session files must have at least two commands")
        return

    last_command = json_commands.pop()

    # access_next, look_next, transfer_next, move_next, execute_next, cleanup_next
    last_command_labels = labeler.labelCommands([last_command], "NoLabel")
    access_next = 0
    look_next = 0
    transfer_next = 0
    move_next = 0
    execute_next = 0
    cleanup_next = 0
    if "Access" in last_command_labels:
        access_next = 1
    if "Look" in last_command_labels:
        look_next = 1
    if "Transfer" in last_command_labels:
        transfer_next = 1
    if "Move" in last_command_labels:
        move_next = 1
    if "Execute" in last_command_labels:
        execute_next = 1
    if "Cleanup" in last_command_labels:
        cleanup_next = 1

    labeled = labeler.labelCommands(json_commands, "NoLabel")

    # @todo cleanup labeled.json when its safe
    llabels.writeLabelsToJsonFile(labeled, "labeled.json")

    # link commands
    ontology = open("ontology.json", "r")
    inv_labeling = open("labeled.json", "r")
    lg = llinkoCreate.createLinko(json.load(inv_labeling), json.load(ontology))
    inv_labeling.close()
    os.remove("labeled.json")
    ontology.close()

    ##################
    # extract features
    ##################

    # node_count
    node_count = len(lg)

    # critical_node_count
    #     @todo: pick something real for critical_threshold
    critical_threshold = node_count / 2
    critical_node_count = lstats.countCriticalNodes(lg, critical_threshold)

    # x_bar, Sigma_x, range_x, y_bar, Sigma_y, range_y
    x_bar, Sigma_x, range_x, y_bar, Sigma_y, range_y = lstats.calculateCartesianStatistics(
        lg)

    # percentage_of_links
    percentage_of_links = lstats.percentageOfLinks(lg)

    # entropy
    entropy = lstats.graphEntropy(lg)

    # T-Complexity
    encoded_lg = lstats.linkographToString(lg)
    t_complexity = lstats.tComplexity(encoded_lg)

    # link_index
    link_index = lstats.links(lg) / len(lg)

    # graph differences
    graph_differences = lstats.summaryDifference(lg)

    # entropy deviation
    entropy_deviation = lstats.entropyDeviation(lg)

    # mean link coverage
    mean_link_coverage = lstats.meanLinkCoverage(lg)

    # top cover
    top_cover = lstats.topCover(lg)

    first_command = json_commands[0]
    first_datetime = utils.stringToDatetime(first_command['ts'])
    session_start_time = first_datetime.hour * 3600 + first_datetime.minute * 60 + first_datetime.second

    # session_length_seconds, mean_delay_seconds
    last_command = json_commands[-1]
    last_datetime = utils.stringToDatetime(last_command['ts'])
    session_length_timedelta = last_datetime - first_datetime
    session_length_seconds = session_length_timedelta.total_seconds()
    if 1 < len(lg):
        mean_delay_seconds = session_length_seconds / (len(lg) - 1)
    else:
        mean_delay_seconds = None

    # access_ratio, look_ratio, transfer_ratio, move_ratio, execute_ratio, cleanup_ratio
    access_ratio = look_ratio = transfer_ratio = move_ratio = execute_ratio = cleanup_ratio = 0
    if "Access" in labeled.keys():
        access_ratio = len(labeled['Access']) / len(lg)
    if "Look" in labeled.keys():
        look_ratio = len(labeled['Look']) / len(lg)
    if "Transfer" in labeled.keys():
        transfer_ratio = len(labeled['Transfer']) / len(lg)
    if "Move" in labeled.keys():
        move_ratio = len(labeled['Move']) / len(lg)
    if "Execute" in labeled.keys():
        execute_ratio = len(labeled['Execute']) / len(lg)
    if "Cleanup" in labeled.keys():
        cleanup_ratio = len(labeled['Cleanup']) / len(lg)

    #################
    # persist in .csv
    #################
    writer.writerow([
        node_count, critical_node_count, x_bar, Sigma_x, range_x, y_bar,
        Sigma_y, range_y, percentage_of_links, entropy, t_complexity,
        link_index, graph_differences, entropy_deviation, mean_link_coverage,
        top_cover, session_start_time, session_length_seconds,
        mean_delay_seconds, access_ratio, look_ratio, transfer_ratio,
        move_ratio, execute_ratio, cleanup_ratio, access_next, look_next,
        transfer_next, move_next, execute_next, cleanup_next
    ])
Beispiel #4
0
        sys.path.append(args.path)

    # Import the linkograph packages. They are loaded here, to
    # provided the ability of adding the their path on the command
    # line.
    import linkograph.linkoCreate as llc # For manipulating linkographs
    import linkograph.stats as ls # For linkograph statistics

    # Set the max and min subgraph size to the specified size.
    minSize = maxSize = args.graphSize

    # Get the linkograph
    linko = llc.readLinkoJson(args.linko)

    # Calcualte the graph Shannon Entropy for the whole linkograph
    totalEntropy = ls.graphEntropy(linko)

    # Calcualte the graph Shannon Entropy for subgraphs.
    subEntropies =  ls.subgraphMetric(linkograph=linko,
                                       metric=ls.graphEntropy,
                                       lowerThreshold=None,
                                       upperThreshold=None,
                                       minSize=minSize,
                                       maxSize=maxSize,
                                       step=args.step,
                                       lowerBound=args.lowerBound,
                                       upperBound=args.upperBound)

    # ls.subgraphMetric returns tuples that give the lower and upper
    # index for each of the subgraph. We just want to graph the
    # entropy portion, which is the third entry of the tuple.
Beispiel #5
0
def genSingleOntologyStats(model,
                           ontLink,
                           minLinkoSize,
                           maxLinkoSize,
                           stepLinkoSize,
                           runNum,
                           precision=2,
                           seeds=None):
    """Generate the stats on link models for a given ontology.

    inputs:

    model: the Markov model used to generate the linkographs.

    ontLink: ontology used for constructing linkographs.

    minLinkoSize: the minimun number of nodes in the linkographs to
    consider.

    maxLinkoSize: the maximum number of nodes in the linkographs to
    consider. Note that the max is not included to match pythons
    convertions on lists and ranges.

    stepLinkoSize: the step size between minLinkoSize to maxLinkoSize
    for the number of linkographs to Consider.

    runNum: the number of linkographs to consider for each linkograph
    size.

    precision:  the number of decimals places to use for the Markov
    models.

    output:

    a number_of _linkographs x 2. The (i, 0) entry provides the
    average Shannon entropy for the i-th size linkograph considered
    and the (i, 1) entry provides the standard deviation for of the
    Shannon entropy for the i-th size linkograph considered.

    """

    linkoSizes = range(minLinkoSize, maxLinkoSize, stepLinkoSize)

    results = np.zeros((len(linkoSizes), 2))

    # For each size linkograph, generate the runNum links and
    # caculate the needed statistics.
    for size in linkoSizes:

        print('size: {0}'.format(size))

        # Collect entropy.
        metrics = np.zeros(runNum)

        for i in range(runNum):

            # Randomize the initial state
            model.state = model.random.randint(1, len(model.absClasses)) - 1

            linko = model.genLinkograph(size, ontology=ontLink)

            entropy = lstats.graphEntropy(linko)

            metrics[i] = entropy

        # Find mean of the entropy
        index = (size - minLinkoSize) // stepLinkoSize
        results[index, 0] = np.mean(metrics)
        results[index, 1] = np.std(metrics)

    return results
Beispiel #6
0
def genSingleOntologyStats(ontNext,
                           ontLink,
                           minLinkoSize,
                           maxLinkoSize,
                           stepLinkoSize,
                           modelNum,
                           runNum,
                           precision=2,
                           seeds=None):
    """Generate the stats on link models for a given ontology.

    inputs:

    ontNext: ontology used to generate Markov model that create the
    next state.

    ontLink: ontology used for constructing linkographs.

    minLinkoSize: the minimun number of nodes in the linkographs to
    consider.

    maxLinkoSize: the maximum number of nodes in the linkographs to
    consider. Note that the max is not included to match pythons
    convertions on lists and ranges.

    stepLinkoSize: the step size between minLinkoSize to maxLinkoSize
    for the number of linkographs to Consider.

    modelNum: the number of models.

    runNum: the number of linkographs to consider for each linkograph
    size.

    precision:  the number of decimals places to use for the Markov
    models.

    seeds: a list of seeds to use for the generated next Markov
    models. The size of the list should be the same as the number of
    runs.

    output:

    a modelNum x number_of _linkographs. The (i, j) entry provides the
    average shannon entropy for the i-th model and j-th size linkgraph
    considered.

    """

    linkoSizes = range(minLinkoSize, maxLinkoSize, stepLinkoSize)

    ontSize = len(ontNext)
    absClasses = list(ontNext.keys())
    absClasses.sort()

    results = np.zeros((modelNum, len(linkoSizes)))

    if seeds is None:
        seeds = [time.time() * i for i in range(modelNum)]

    models = []
    # Create the generating models
    for i in range(modelNum):
        m = markel.genModelFromOntology(ontology=ontNext,
                                        precision=2,
                                        seed=seeds[i])

        # Storing the model and the current state
        models.append(m)

    # For each size linkograph, generate the runNum links and
    # caculate the needed statistics.
    for size in linkoSizes:

        print('size: {0}'.format(size))

        for modelIndex, m in enumerate(models):

            # Collect entropy and complexity.
            metrics = np.zeros(runNum)

            for i in range(runNum):

                # Randomize the initial state
                m.state = m.random.randint(1, len(m.absClasses)) - 1

                linko = m.genLinkograph(size, ontology=ontLink)

                entropy = lstats.graphEntropy(linko)

                metrics[i] = entropy

            # Find the mean across the different runs.
            index = (size - minLinkoSize) // stepLinkoSize
            results[modelIndex][index] = np.mean(metrics)

    return results