Beispiel #1
0
def main(args=sys.argv[1:]):
    if '--suffix' in args:
        ind = args.index('--suffix')
        suffix = args[ind + 1]
        del args[ind]
        del args[ind]
    else:
        suffix = ''

    if '--monitor' in args:
        args.remove('--monitor')
        return monitorCondor(args, suffix)

    optionsNoSuffix, _ = parseArgs(addArgs(args))

    args = addArgs(args, suffix)
    options, _ = parseArgs(args)

    if os.path.exists(options.saveFile):
        return

    if not (os.path.exists(addSuffix(options.saveFile))) and os.path.exists(
            addSuffix(optionsNoSuffix.saveFile)):
        import shutil
        shutil.copy(addSuffix(optionsNoSuffix.saveFile),
                    addSuffix(options.saveFile))

    def repl(x):
        numStudentsToAdd = 1
        filename = ''
        saveFile = addSuffix(options.saveFile)
        if os.path.exists(saveFile):
            filename = options.saveFile

        x = x.replace(
            '$(EVAL_PATH)',
            'data/dt/studentsNew29-unperturbed-%i/weighted/only-$(EVAL_STUDENT).weka'
            % options.numSource)
        x = x.replace(
            '$(SOURCE_DATA_PATH)',
            'data/dt/studentsNew29-unperturbed-%i/train/$(DATA_STUDENT).arff' %
            options.numSource)
        x = x.replace('$(TARGET_STUDENT)', options.student)
        x = x.replace('$(NUM_STUDENTS_TO_ADD)', str(numStudentsToAdd))
        x = x.replace('$(FILENAME)', filename)
        return x

    trainClassifierMain(options, repl)
Beispiel #2
0
def monitorCondor(args, suffix):
    numStudents = len(getUniqueStudents())
    jobs = [-1 for i in range(numStudents)]
    unfinished = range(numStudents)
    saveFiles = []
    for studentInd in range(numStudents):
        options, _ = parseArgs(addArgs(args, suffix, studentInd))
        saveFiles.append(options.saveFile)

    while len(unfinished) > 0:
        needToRun = []
        # check what's running and still needs to run
        p = subprocess.Popen(['condor_q', 'sbarrett'], stdout=subprocess.PIPE)
        out, _ = p.communicate()
        for studentInd in list(unfinished):
            if out.find(str(jobs[studentInd])) < 0:
                if os.path.exists(saveFiles[studentInd]):
                    unfinished.remove(studentInd)
                else:
                    needToRun.append(studentInd)
        # submit new jobs as needed
        for studentInd in needToRun:
            jobs[studentInd] = submit([str(studentInd)] + args, suffix)
        # good-night sweet prince
        time.sleep(20)
def monitorCondor(args,suffix):
  numStudents = len(getUniqueStudents())
  jobs = [-1 for i in range(numStudents)]
  unfinished = range(numStudents)
  saveFiles = []
  for studentInd in range(numStudents):
    options,_ = parseArgs(addArgs(args,suffix,studentInd))
    saveFiles.append(options.saveFile)
    
  while len(unfinished) > 0:
    needToRun = []
    # check what's running and still needs to run
    p = subprocess.Popen(['condor_q','sbarrett'],stdout=subprocess.PIPE)
    out,_ = p.communicate()
    for studentInd in list(unfinished):
      if out.find(str(jobs[studentInd])) < 0:
        if os.path.exists(saveFiles[studentInd]):
          unfinished.remove(studentInd)
        else:
          needToRun.append(studentInd)
    # submit new jobs as needed
    for studentInd in needToRun:
      jobs[studentInd] = submit([str(studentInd)] + args,suffix)
    # good-night sweet prince
    time.sleep(20)
def main(args=sys.argv[1:]):
    args = ['twostagetradaboost-partial'] + args
    combine = False
    if '--combine' in args:
        args.remove('--combine')
        combine = True
        args = args + ['--ignorePartialMax']

    options, _ = parseArgs(args)
    directory = os.path.join('configs/learners/saved/twostage-partial',
                             options.baseLearner, options.student)
    pathBase = os.path.join(directory, '%i.txt')
    if combine:
        print options.student, options.partialMax
        bestT = None
        bestError = numpy.inf
        for t in range(options.partialMax):
            path = pathBase % t
            with open(path, 'r') as f:
                error = float(f.read().strip())
            if error < bestError:
                bestError = error
                bestT = t
        options.partialInd = bestT
        print 'bestT:', bestT
        print 'bestError:', bestError
        trainClassifierMain(options)
    else:
        args = args + ['--no-save', '--catchOutput']
        options, _ = parseArgs(args)
        output, error = trainClassifierMain(options)
        res = re.findall('BEST T: (\d+)\nBEST ERROR: ([.0-9]+)', output)
        bestT = int(res[0][0])
        bestError = float(res[0][1])

        try:
            os.makedirs(directory)
        except:
            pass
        filename = pathBase % bestT
        with open(filename, 'w') as f:
            f.write('%f\n' % bestError)
def main(args=sys.argv[1:]):
  args = ['twostagetradaboost-partial'] + args
  combine = False
  if '--combine' in args:
    args.remove('--combine')
    combine = True
    args = args + ['--ignorePartialMax']

  options,_ = parseArgs(args)
  directory = os.path.join('configs/learners/saved/twostage-partial',options.baseLearner,options.student)
  pathBase = os.path.join(directory,'%i.txt')
  if combine:
    print options.student,options.partialMax
    bestT = None
    bestError = numpy.inf
    for t in range(options.partialMax):
      path = pathBase % t
      with open(path,'r') as f:
        error = float(f.read().strip())
      if error < bestError:
        bestError = error
        bestT = t
    options.partialInd = bestT
    print 'bestT:',bestT
    print 'bestError:',bestError
    trainClassifierMain(options)
  else:
    args = args + ['--no-save','--catchOutput']
    options,_ = parseArgs(args)
    output,error = trainClassifierMain(options)
    res = re.findall('BEST T: (\d+)\nBEST ERROR: ([.0-9]+)',output)
    bestT = int(res[0][0])
    bestError = float(res[0][1])
    
    try:
      os.makedirs(directory)
    except:
      pass
    filename = pathBase % bestT
    with open(filename,'w') as f:
      f.write('%f\n'%bestError)
def main(args=sys.argv[1:]):
  if '--suffix' in args:
    ind = args.index('--suffix')
    suffix = args[ind+1]
    del args[ind]
    del args[ind]
  else:
    suffix = ''

  if '--monitor' in args:
    args.remove('--monitor')
    return monitorCondor(args,suffix)

  optionsNoSuffix,_ = parseArgs(addArgs(args))

  args = addArgs(args,suffix)
  options,_ = parseArgs(args)

  if os.path.exists(options.saveFile):
    return

  if not(os.path.exists(addSuffix(options.saveFile))) and os.path.exists(addSuffix(optionsNoSuffix.saveFile)):
    import shutil
    shutil.copy(addSuffix(optionsNoSuffix.saveFile),addSuffix(options.saveFile))

  def repl(x):
    numStudentsToAdd = 1
    filename = ''
    saveFile = addSuffix(options.saveFile)
    if os.path.exists(saveFile):
      filename = options.saveFile

    x = x.replace('$(EVAL_PATH)','data/dt/studentsNew29-unperturbed-%i/weighted/only-$(EVAL_STUDENT).weka' % options.numSource)
    x = x.replace('$(SOURCE_DATA_PATH)','data/dt/studentsNew29-unperturbed-%i/train/$(DATA_STUDENT).arff' % options.numSource)
    x = x.replace('$(TARGET_STUDENT)',options.student)
    x = x.replace('$(NUM_STUDENTS_TO_ADD)',str(numStudentsToAdd))
    x = x.replace('$(FILENAME)',filename)
    return x

  trainClassifierMain(options,repl)
def submit(args,suffix):
  options,_ = parseArgs(addArgs(args,suffix))
  base = 'condor/createTwoStageTransfer'
  orig = os.path.join(base,'base.condor')
  path = os.path.join(base,'jobs/%i%s.condor' % (options.studentInd,suffix))
  with open(orig,'r') as f:
    contents = f.read()
  contents = contents.replace('$(Process)','%i%s' % (options.studentInd,suffix))
  argStr = ' '.join(args)
  if suffix.strip() != '':
    argStr += ' --suffix %s' % suffix
  contents = contents.replace('$(ARGS)',argStr)
  with open(path,'w') as f:
    f.write(contents)

  print 'SUBMITTING ',options.studentInd
  p = subprocess.Popen(['condor_submit',path],stdout=subprocess.PIPE)
  out,_ = p.communicate()
  jobNum = int(re.findall('cluster (\d+)',out)[0])
  return jobNum
Beispiel #8
0
def submit(args, suffix):
    options, _ = parseArgs(addArgs(args, suffix))
    base = 'condor/createTwoStageTransfer'
    orig = os.path.join(base, 'base.condor')
    path = os.path.join(base,
                        'jobs/%i%s.condor' % (options.studentInd, suffix))
    with open(orig, 'r') as f:
        contents = f.read()
    contents = contents.replace('$(Process)',
                                '%i%s' % (options.studentInd, suffix))
    argStr = ' '.join(args)
    if suffix.strip() != '':
        argStr += ' --suffix %s' % suffix
    contents = contents.replace('$(ARGS)', argStr)
    with open(path, 'w') as f:
        f.write(contents)

    print 'SUBMITTING ', options.studentInd
    p = subprocess.Popen(['condor_submit', path], stdout=subprocess.PIPE)
    out, _ = p.communicate()
    jobNum = int(re.findall('cluster (\d+)', out)[0])
    return jobNum