Esempio n. 1
0

if __name__ == "__main__":
    """Crude, prototypical approach to fake data table generation."""
    # smoke test
    import candidate_test_pruners

    nodes, head = buildRandomModel(
        50,
        4,
        1,
        0.5,
        0.3,
        2,
        ["U:\\mercurial\\fake-data-generator\\src\\ModelBehaviors"],
        candidate_test_pruners.bigDelta(),
    )
    with open("E:\\debris\\whatever.gv", "w") as gvfile:
        gvfile.write(graphvizEntireThing(head))
        gvfile.flush()
    import csv

    with open("E:\\debris\\fakeData.tsv", "wb") as tsvfile:
        cleanWriter = csv.writer(tsvfile, dialect="excel-tab")
        cleanWriter.writerow(["{0}:{1}".format(node.name, node.genName(4)) for node in nodes])
        for x in range(200):
            key = object()
            cleanWriter.writerow([str(node.calculate(key)) for node in nodes])
            if not x % 100:
                print x, "rows done"
        tsvfile.flush()
Esempio n. 2
0
Created on Mar 7, 2012

@author: anorberg
"""

import os.path
import sys
import candidate_test_pruners
import optparse
import ConfigParser

NULL = candidate_test_pruners.nullPruner()
UNIFOUR = candidate_test_pruners.uniformThroughFour()
GLOBALCUT = candidate_test_pruners.globalCutoff()
MINFRAC = candidate_test_pruners.minimalistFraction()
BIGDELTA = candidate_test_pruners.bigDelta()

PRUNER_LUT = {
    "null": NULL,
    "nullpruner": NULL,
    "n": NULL,
    "uniform": UNIFOUR,
    "uniformthroughfour": UNIFOUR,
    "u": UNIFOUR,
    "minfrac": MINFRAC,
    "minimalist": MINFRAC,
    "m": MINFRAC,
    "fraction": MINFRAC,
    "fractional": MINFRAC,
    "minimalistfraction": MINFRAC,
    "f": MINFRAC,
class Config(object):
    '''
    A struct-like class that holds the configuration for the fake data generator.
    Designed to build itself from a command line and/or a config file.
    '''

    outputRoot = os.path.join(os.path.curdir, "generatedData")
    nGraphs = 1
    graphSize = 50
    nSeeds = 4
    gvRecursion = 1
    tsvRecursion = 3
    tsvColRate = 1.0
    behaviorPaths = [
        os.path.join(os.path.dirname(__file__), "..", "ModelBehaviors")
    ]
    pruner = candidate_test_pruners.bigDelta()
    samples = 500
    addIdentity = 3

    def __init__(self, relevant_argv=None):
        '''
        Constructor. Read settings out of relevant_argv with optparse, gating to ConfigParser in case of the --config option
        '''
        if relevant_argv is None:
            relevant_argv = sys.argv[1:]

        parser = optparse.OptionParser()
        parser.add_option("-?",
                          dest="stopAndHelp",
                          action="store_true",
                          help="View this help")
        parser.add_option("-c",
                          "--config",
                          dest="configFile",
                          help="Load a configuration file")
        parser.add_option("-g",
                          "--graphs",
                          dest="graphs",
                          type="int",
                          help="Number of separate graphs to generate")
        parser.add_option("-n",
                          "--graphSize",
                          dest="graphSize",
                          type="int",
                          help="Number of nodes per graph (including seeds)")
        parser.add_option("-s",
                          "--seeds",
                          dest="seeds",
                          type="int",
                          help="Number of seeds per graph")
        parser.add_option(
            "-r",
            "--graphvizRecursion",
            dest="gvRecursion",
            type="int",
            help=
            "Depth of recursion for procedural name generation in Graphviz diagram"
        )
        parser.add_option(
            "-t",
            "--tsvRecursion",
            dest="tsvRecursion",
            type="int",
            help="Depth of recursion for name generation in generated data file"
        )
        parser.add_option(
            "-p",
            "--pickRate",
            dest="pickRate",
            type="float",
            help=
            "Fraction of nodes to place in the output file; selected randomly")
        parser.add_option(
            "-b",
            "--behaviors",
            dest="behaviors",
            help="Paths to search (use OS path separator) for behavior plugins"
        )
        parser.add_option("-x",
                          "--pruner",
                          dest="pruner",
                          help="Name of graph pruning algorithm to use")
        parser.add_option("-m",
                          "--samples",
                          dest="samples",
                          type="int",
                          help="Number of rows of data to output")
        parser.add_option(
            "-o",
            "--output",
            dest="outputRoot",
            help=
            "Output file name without extension; .gv or .txt will be appended")
        parser.add_option(
            "-u",
            "--unnoisiness",
            dest="unNoisiness",
            help=
            "Number of times to add the identity function to the pool of noise functions"
        )

        (options, args) = parser.parse_args(relevant_argv)

        if options.stopAndHelp:
            parser.print_help()
            sys.exit()

        if options.configFile:
            self._parse_config_file(options.configFile)
        #all settings on the CLI override settings in the file, so just blindly write from here on out

        if options.graphs:
            self.nGraphs = options.graphs

        if options.graphSize:
            self.graphSize = options.graphSize

        if options.seeds:
            self.nSeeds = options.seeds

        if options.samples:
            self.samples = options.samples

        if options.gvRecursion is not None:
            #because zero is a legal value
            self.gvRecursion = options.gvRecursion

        if options.tsvRecursion is not None:
            self.tsvRecursion = options.tsvRecursion

        if options.pickRate is not None:
            self.tsvColRate = options.pickRate

        if options.behaviors:
            self.behaviorPaths = options.behaviors.split(os.path.pathsep)

        if options.pruner:
            self.pruner = PRUNER_LUT[options.pruner.lower()]

        if options.outputRoot:
            self.outputRoot = options.outputRoot

        if options.unNoisiness:
            self.addIdentity = options.unNoisiness

    def _parse_config_file(self, filePath):
        """
        Use a ConfigParser to load settings.
        """
        parser = ConfigParser.SafeConfigParser({
            "File":
            self.outputRoot,
            "PickRate":
            self.tsvColRate,
            "TsvRecursion":
            self.tsvRecursion,
            "GraphvizRecursion":
            self.gvRecursion,
            "Behaviors":
            os.path.pathsep.join(self.behaviorPaths),
            "Pruner":
            self.pruner,
            "Graphs":
            self.nGraphs,
            "GraphSize":
            self.graphSize,
            "Seeds":
            self.nSeeds,
            "Samples":
            self.samples
        })
        parser.add_section("Output")
        parser.add_section("Model")
        parser.add_section("Generation")

        parser.read(filePath)

        self.outputRoot = parser.get("Output", "File")
        self.tsvColRate = parser.getfloat("Output", "PickRate")
        self.tsvRecursion = parser.getint("Output", "TsvRecursion")
        self.gvRecursion = parser.getint("Output", "GraphvizRecursion")
        self.samples = parser.getint("Output", "Samples")

        self.behaviorPaths = parser.get("Model",
                                        "Behaviors").split(os.path.pathsep)
        self.pruner = PRUNER_LUT[parser.get("Model", "Pruner").lower()]
        self.addIdentity = parser.getint("Model", "UnNoisiness")

        self.nGraphs = parser.getint("Generation", "Graphs")
        self.graphSize = parser.getint("Generation", "GraphSize")
        self.nSeeds = parser.getint("Generation", "Seeds")
        for pluginInfo in candidatePruners:
            if pluginInfo.name == pruner:
                pruner = pluginInfo.plugin_object
                pruner.activate()
                break
        raise ValueError("No pruner by name {0} found in specified paths.".format(pruner))
    trimmedGraph = pruner.prune(rawCompleteGraph)
    
    function_plugins =modelBehaviorImplementations(behaviorPaths)
    functions = [plugin.plugin_object for plugin in function_plugins]
    return workingModelFromPygraph(trimmedGraph, functions, bonus_identity)


if __name__ == "__main__":
    """Crude, prototypical approach to fake data table generation."""
    #smoke test
    import candidate_test_pruners
    nodes, head = buildRandomModel(50, 4, 1, 0.5, 0.3, 2, ['U:\\mercurial\\fake-data-generator\\src\\ModelBehaviors'], candidate_test_pruners.bigDelta())
    with open("E:\\debris\\whatever.gv", "w") as gvfile:
        gvfile.write(graphvizEntireThing(head))
        gvfile.flush()
    import csv
    with open("E:\\debris\\fakeData.tsv", "wb") as tsvfile:
        cleanWriter = csv.writer(tsvfile, dialect='excel-tab')
        cleanWriter.writerow(["{0}:{1}".format(node.name, node.genName(4)) for node in nodes])
        for x in range(200):
            key = object()
            cleanWriter.writerow([str(node.calculate(key)) for node in nodes])
            if not x % 100:
                print x, "rows done"
        tsvfile.flush()
Created on Mar 7, 2012

@author: anorberg
'''

import os.path
import sys
import candidate_test_pruners
import optparse
import ConfigParser

NULL = candidate_test_pruners.nullPruner()
UNIFOUR = candidate_test_pruners.uniformThroughFour()
GLOBALCUT = candidate_test_pruners.globalCutoff()
MINFRAC = candidate_test_pruners.minimalistFraction()
BIGDELTA = candidate_test_pruners.bigDelta()

PRUNER_LUT = {
    "null": NULL,
    "nullpruner": NULL,
    "n": NULL,
    "uniform": UNIFOUR,
    "uniformthroughfour": UNIFOUR,
    "u": UNIFOUR,
    "minfrac": MINFRAC,
    "minimalist": MINFRAC,
    "m": MINFRAC,
    "fraction": MINFRAC,
    "fractional": MINFRAC,
    "minimalistfraction": MINFRAC,
    "f": MINFRAC,