Esempio n. 1
0
def swarmModel(basedir, column):
    filename = os.path.join(_FILE_PATH, "data\\\swarm_params")
    model_params = permutations_runner.runWithJsonFile(filename, {
        'maxWorkers': 1,
        'overwrite': True,
        'verbosityCount': 3
    }, column + "_swarm", basedir)
    model = ModelFactory.create(model_params)
Esempio n. 2
0
def runPermutations(args):
  """
  The main function of the RunPermutations utility.
  This utility will automatically generate and run multiple prediction framework
  experiments that are permutations of a base experiment via the Grok engine.
  For example, if you have an experiment that you want to test with 3 possible
  values of variable A and 2 possible values of variable B, this utility will
  automatically generate the experiment directories and description files for
  each of the 6 different experiments.

  Here is an example permutations file which is read by this script below. The
  permutations file must be in the same directory as the description.py for the
  base experiment that you want to permute. It contains a permutations dict, an
  optional list of the result items to report on for each experiment, and an
  optional result item to optimize for.

  When an 'optimize' entry is provided, this tool will attempt to prioritize the
  order in which the various permutations are run in order to improve the odds
  of running the best permutations sooner. It does this by watching the results
  for various parameter values and putting parameter values that give generally
  better results at the head of the queue.

  In addition, when the optimize key is provided, we periodically update the UI
  with the best results obtained so far on that metric.

  ---------------------------------------------------------------------------
  permutations = dict(
                  iterationCount = [1000, 5000],
                  coincCount = [50, 100],
                  trainTP = [False],
                  )

  report = ['.*reconstructErrAvg',
            '.*inputPredScore.*',
            ]

  optimize = 'postProc_gym1_baseline:inputPredScore'

  Parameters:
  ----------------------------------------------------------------------
  args:           Command-line args; the equivalent of sys.argv[1:]
  retval:         for the actions 'run', 'pickup', and 'dryRun', returns the
                  Hypersearch job ID (in ClinetJobs table); otherwise returns
                  None
  """

  helpString = (
      "\n\n%prog [options] permutationsScript\n"
      "%prog [options] expDescription.json\n\n"
      "This script runs permutations of an experiment via Grok engine, as "
      "defined in a\npermutations.py script or an expGenerator experiment "
      "description json file.\nIn the expDescription.json form, the json file "
      "MUST have the file extension\n'.json' and MUST conform to "
      "expGenerator/experimentDescriptionSchema.json.")

  parser = optparse.OptionParser(usage=helpString)

  parser.add_option(
    "--replaceReport", dest="replaceReport", action="store_true",
    default=DEFAULT_OPTIONS["replaceReport"],
    help="Replace existing csv report file if it exists. Default is to "
         "append to the existing file. [default: %default].")

  parser.add_option(
    "--action", dest="action", default=DEFAULT_OPTIONS["action"],
    choices=["run", "pickup", "report", "dryRun"],
    help="Which action to perform. Possible actions are run, pickup, choices, "
         "report, list. "
         "run: run a new HyperSearch via Grok. "
         "pickup: pick up the latest run of a HyperSearch job. "
         "dryRun: run a single HypersearchWorker inline within the application "
         "process without the Grok infrastructure to flush out bugs in "
         "description and permutations scripts; defaults to "
         "maxPermutations=1: use --maxPermutations to change this; "
         "report: just print results from the last or current run. "
         "[default: %default].")

  parser.add_option(
    "--maxPermutations", dest="maxPermutations",
    default=DEFAULT_OPTIONS["maxPermutations"], type="int",
    help="Maximum number of models to search. Applies only to the 'run' and "
    "'dryRun' actions. [default: %default].")

  parser.add_option(
    "--exports", dest="exports", default=DEFAULT_OPTIONS["exports"],
    type="string",
    help="json dump of environment variable settings that should be applied"
    "for the job before running. [default: %default].")

  parser.add_option(
    "--useTerminators", dest="useTerminators", action="store_true",
    default=DEFAULT_OPTIONS["useTerminators"], help="Use early model terminators in HyperSearch"
         "[default: %default].")

  parser.add_option(
      "--maxWorkers", dest="maxWorkers", default=DEFAULT_OPTIONS["maxWorkers"],
      type="int",
      help="Maximum number of concurrent workers to launch. Applies only to "
      "the 'run' action. [default: %default].")

  parser.add_option(
    "-v", dest="verbosityCount", action="count", default=0,
    help="Increase verbosity of the output.  Specify multiple times for "
         "increased verbosity. e.g., -vv is more verbose than -v.")

  parser.add_option(
    "--timeout", dest="timeout", default=DEFAULT_OPTIONS["timeout"], type="int",
     help="Time out for this search in minutes"
         "[default: %default].")

  parser.add_option(
    "--overwrite", default=DEFAULT_OPTIONS["overwrite"], action="store_true",
    help="If 'yes', overwrite existing description.py and permutations.py"
         " (in the same directory as the <expDescription.json> file) if they"
         " already exist. [default: %default].")

  parser.add_option(
    "--genTopNDescriptions", dest="genTopNDescriptions",
    default=DEFAULT_OPTIONS["genTopNDescriptions"], type="int",
    help="Generate description files for the top N models. Each one will be"
         " placed into it's own subdirectory under the base description file."
         "[default: %default].")

  (options, positionalArgs) = parser.parse_args(args)

  # Get the permutations script's filepath
  if len(positionalArgs) != 1:
    parser.error("You must supply the name of exactly one permutations script "
                 "or JSON description file.")

  fileArgPath = os.path.expanduser(positionalArgs[0])
  fileArgPath = os.path.expandvars(fileArgPath)
  fileArgPath = os.path.abspath(fileArgPath)

  permWorkDir = os.path.dirname(fileArgPath)

  outputLabel = os.path.splitext(os.path.basename(fileArgPath))[0]

  basename = os.path.basename(fileArgPath)
  fileExtension = os.path.splitext(basename)[1]
  optionsDict = vars(options)

  if fileExtension == ".json":
    returnValue = permutations_runner.runWithJsonFile(
      fileArgPath, optionsDict, outputLabel, permWorkDir)
  else:
    returnValue = permutations_runner.runWithPermutationsScript(
      fileArgPath, optionsDict, outputLabel, permWorkDir)

  return returnValue
Esempio n. 3
0
def runPermutations(args):
    """
  The main function of the RunPermutations utility.
  This utility will automatically generate and run multiple prediction framework
  experiments that are permutations of a base experiment via the Grok engine.
  For example, if you have an experiment that you want to test with 3 possible
  values of variable A and 2 possible values of variable B, this utility will
  automatically generate the experiment directories and description files for
  each of the 6 different experiments.

  Here is an example permutations file which is read by this script below. The
  permutations file must be in the same directory as the description.py for the
  base experiment that you want to permute. It contains a permutations dict, an
  optional list of the result items to report on for each experiment, and an
  optional result item to optimize for.

  When an 'optimize' entry is provided, this tool will attempt to prioritize the
  order in which the various permutations are run in order to improve the odds
  of running the best permutations sooner. It does this by watching the results
  for various parameter values and putting parameter values that give generally
  better results at the head of the queue.

  In addition, when the optimize key is provided, we periodically update the UI
  with the best results obtained so far on that metric.

  ---------------------------------------------------------------------------
  permutations = dict(
                  iterationCount = [1000, 5000],
                  coincCount = [50, 100],
                  trainTP = [False],
                  )

  report = ['.*reconstructErrAvg',
            '.*inputPredScore.*',
            ]

  optimize = 'postProc_gym1_baseline:inputPredScore'

  Parameters:
  ----------------------------------------------------------------------
  args:           Command-line args; the equivalent of sys.argv[1:]
  retval:         for the actions 'run', 'pickup', and 'dryRun', returns the
                  Hypersearch job ID (in ClinetJobs table); otherwise returns
                  None
  """

    helpString = (
        "\n\n%prog [options] permutationsScript\n"
        "%prog [options] expDescription.json\n\n"
        "This script runs permutations of an experiment via Grok engine, as "
        "defined in a\npermutations.py script or an expGenerator experiment "
        "description json file.\nIn the expDescription.json form, the json file "
        "MUST have the file extension\n'.json' and MUST conform to "
        "expGenerator/experimentDescriptionSchema.json.")

    parser = optparse.OptionParser(usage=helpString)

    parser.add_option(
        "--replaceReport",
        dest="replaceReport",
        action="store_true",
        default=DEFAULT_OPTIONS["replaceReport"],
        help="Replace existing csv report file if it exists. Default is to "
        "append to the existing file. [default: %default].")

    parser.add_option(
        "--action",
        dest="action",
        default=DEFAULT_OPTIONS["action"],
        choices=["run", "pickup", "report", "dryRun"],
        help=
        "Which action to perform. Possible actions are run, pickup, choices, "
        "report, list. "
        "run: run a new HyperSearch via Grok. "
        "pickup: pick up the latest run of a HyperSearch job. "
        "dryRun: run a single HypersearchWorker inline within the application "
        "process without the Grok infrastructure to flush out bugs in "
        "description and permutations scripts; defaults to "
        "maxPermutations=1: use --maxPermutations to change this; "
        "report: just print results from the last or current run. "
        "[default: %default].")

    parser.add_option(
        "--maxPermutations",
        dest="maxPermutations",
        default=DEFAULT_OPTIONS["maxPermutations"],
        type="int",
        help="Maximum number of models to search. Applies only to the 'run' and "
        "'dryRun' actions. [default: %default].")

    parser.add_option(
        "--exports",
        dest="exports",
        default=DEFAULT_OPTIONS["exports"],
        type="string",
        help="json dump of environment variable settings that should be applied"
        "for the job before running. [default: %default].")

    parser.add_option("--useTerminators",
                      dest="useTerminators",
                      action="store_true",
                      default=DEFAULT_OPTIONS["useTerminators"],
                      help="Use early model terminators in HyperSearch"
                      "[default: %default].")

    parser.add_option(
        "--maxWorkers",
        dest="maxWorkers",
        default=DEFAULT_OPTIONS["maxWorkers"],
        type="int",
        help="Maximum number of concurrent workers to launch. Applies only to "
        "the 'run' action. [default: %default].")

    parser.add_option(
        "-v",
        dest="verbosityCount",
        action="count",
        default=0,
        help="Increase verbosity of the output.  Specify multiple times for "
        "increased verbosity. e.g., -vv is more verbose than -v.")

    parser.add_option("--timeout",
                      dest="timeout",
                      default=DEFAULT_OPTIONS["timeout"],
                      type="int",
                      help="Time out for this search in minutes"
                      "[default: %default].")

    parser.add_option(
        "--overwrite",
        default=DEFAULT_OPTIONS["overwrite"],
        action="store_true",
        help="If 'yes', overwrite existing description.py and permutations.py"
        " (in the same directory as the <expDescription.json> file) if they"
        " already exist. [default: %default].")

    parser.add_option(
        "--genTopNDescriptions",
        dest="genTopNDescriptions",
        default=DEFAULT_OPTIONS["genTopNDescriptions"],
        type="int",
        help="Generate description files for the top N models. Each one will be"
        " placed into it's own subdirectory under the base description file."
        "[default: %default].")

    (options, positionalArgs) = parser.parse_args(args)

    # Get the permutations script's filepath
    if len(positionalArgs) != 1:
        parser.error(
            "You must supply the name of exactly one permutations script "
            "or JSON description file.")

    fileArgPath = os.path.expanduser(positionalArgs[0])
    fileArgPath = os.path.expandvars(fileArgPath)
    fileArgPath = os.path.abspath(fileArgPath)

    permWorkDir = os.path.dirname(fileArgPath)

    outputLabel = os.path.splitext(os.path.basename(fileArgPath))[0]

    basename = os.path.basename(fileArgPath)
    fileExtension = os.path.splitext(basename)[1]

    if fileExtension == ".json":
        returnValue = permutations_runner.runWithJsonFile(
            fileArgPath, options, outputLabel, permWorkDir)
    else:
        returnValue = permutations_runner.runWithPermutationsScript(
            fileArgPath, options, outputLabel, permWorkDir)

    return returnValue
Esempio n. 4
0
def run_swarm():
    convert_csv()
    swarm_config = {}  # complete swarm config here
    return permutations_runner.runWithJsonFile(os.getcwd() + "/search_def.json",{'maxWorkers': 8, 'overwrite': True}, "test", os.getcwd())