def mainOptions(options, convertFunction=lambda x: x): try: #options.saveFile = getSaveFilename(options) filename = combineConfigs(options.saveFile, options, convertFunction) targetData = getFilename(options.targetBase, options.student, TRAIN) if (options.numSource == 0) or not (options.useSource): sourceData = [] else: sourceData = [ getFilename(options.sourceBase, s, TRAIN) for s in options.otherStudents ] # run the cmd print 'save config will be at', options.saveConfigFilename cmd = [ 'bin/%s/trainClassifier' % getArch(), filename, options.saveFile, targetData ] + sourceData if options.fracSourceData is not None: cmd += ['--fracSourceData', str(options.fracSourceData)] if options.debug: print ' '.join(cmd) if options.catchOutput: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, error = p.communicate() else: subprocess.check_call(cmd) finally: if not (options.debug): try: os.remove(filename) except: pass if options.catchOutput: return output, error
def mainOptions(options, convertFunction=lambda x: x): try: # options.saveFile = getSaveFilename(options) filename = combineConfigs(options.saveFile, options, convertFunction) targetData = getFilename(options.targetBase, options.student, TRAIN) if (options.numSource == 0) or not (options.useSource): sourceData = [] else: sourceData = [getFilename(options.sourceBase, s, TRAIN) for s in options.otherStudents] # run the cmd print "save config will be at", options.saveConfigFilename cmd = ["bin/%s/trainClassifier" % getArch(), filename, options.saveFile, targetData] + sourceData if options.fracSourceData is not None: cmd += ["--fracSourceData", str(options.fracSourceData)] if options.debug: print " ".join(cmd) if options.catchOutput: p = subprocess.Popen(cmd, stdout=subprocess.PIPE) output, error = p.communicate() else: subprocess.check_call(cmd) finally: if not (options.debug): try: os.remove(filename) except: pass if options.catchOutput: return output, error
def main(args=sys.argv[1:]): options = parseArgs(args) testFile = getFilename(options.testBase, options.student, TRAIN) cmd = [ 'bin/%s/runClassifier' % getArch(), options.saveConfigFilename, testFile, str(options.numTest), '--notrain' ] subprocess.check_call(cmd)
def main(basename,dataDir,stayWeight,treeOptions,options): students = ['gr','ta','gp','pd'] for i,student in enumerate(students): if (options.studentInd is not None) and (i != options.studentInd): continue print '-------------------' print student print '-------------------' dataFile = getFilename(dataDir,student,TRAIN) createDT(dataFile,basename,'only-%s' % student,stayWeight,treeOptions,options.useWeka,options.numInstances,options.randomTree,options.numRandomTrees,options.featureFrac,options.resampleFrac,options.randomTreeInd)
def makeTree(data,useWeka,stayWeight,base,name,treeOptions,randomTree,featureFrac): descFile = getFilename(base,name,DESC) #unweightedFile = getFilename(base,name,UNWEIGHTED) weightedFile = getFilename(base,name,WEIGHTED) if (stayWeight is not None) and (abs(stayWeight - 1.0) > 0.0001): print 'Adding stay weights' addARFFWeights(data,data,stayWeight) if useWeka: print 'Running weka to create initial tree' createTree(data,descFile,treeOptions,randomTree,featureFrac) print 'Extracting tree from weka output' # NOTE: changed weka to output class distributions, no longer need to add my own weights #if randomTree: #extractTree(data,descFile,unweightedFile) #print 'Adding class weights to tree' #weightTree(unweightedFile,data,weightedFile) #else: extractTree(data,descFile,weightedFile) else: print 'Running buildDT to create a weighted tree' buildDT(data,weightedFile,treeOptions,randomTree)
def makeTree(data, useWeka, stayWeight, base, name, treeOptions, randomTree, featureFrac): descFile = getFilename(base, name, DESC) #unweightedFile = getFilename(base,name,UNWEIGHTED) weightedFile = getFilename(base, name, WEIGHTED) if (stayWeight is not None) and (abs(stayWeight - 1.0) > 0.0001): print 'Adding stay weights' addARFFWeights(data, data, stayWeight) if useWeka: print 'Running weka to create initial tree' createTree(data, descFile, treeOptions, randomTree, featureFrac) print 'Extracting tree from weka output' # NOTE: changed weka to output class distributions, no longer need to add my own weights #if randomTree: #extractTree(data,descFile,unweightedFile) #print 'Adding class weights to tree' #weightTree(unweightedFile,data,weightedFile) #else: extractTree(data, descFile, weightedFile) else: print 'Running buildDT to create a weighted tree' buildDT(data, weightedFile, treeOptions, randomTree)
def main(basename, dataDir, stayWeight, treeOptions, options): students = ['gr', 'ta', 'gp', 'pd'] for i, student in enumerate(students): if (options.studentInd is not None) and (i != options.studentInd): continue print '-------------------' print student print '-------------------' dataFile = getFilename(dataDir, student, TRAIN) createDT(dataFile, basename, 'only-%s' % student, stayWeight, treeOptions, options.useWeka, options.numInstances, options.randomTree, options.numRandomTrees, options.featureFrac, options.resampleFrac, options.randomTreeInd)
def main(args=sys.argv[1:]): usage = 'createTrBagg [--balanced] numTarget numSource jobStart [numJobs]' balanced = False if '--balanced' in args: args.remove('--balanced') balanced = True if (len(args) < 3) or (len(args) > 4): print >> sys.stderr, usage sys.exit(1) numClassifiers = 1000 numTarget = int(args[0]) numSource = int(args[1]) jobStart = int(args[2]) if len(args) >= 4: numJobs = int(args[3]) else: numJobs = 1 jobStart *= numJobs base = 'studentsNew29-unperturbed-%i' outputBase = 'studentsNew29-unperturbed-transfer/target%i-source%i' % ( numTarget, numSource) if not os.path.exists('data/dt/' + outputBase + '/desc'): os.makedirs('data/dt/' + outputBase + '/desc') if not os.path.exists('data/dt/' + outputBase + '/weighted'): os.makedirs('data/dt/' + outputBase + '/weighted') for jobOffset in range(numJobs): jobNum = jobStart + jobOffset studentInd = jobNum / numClassifiers classifierInd = jobNum % numClassifiers print 'job,student,classifier:', jobNum, studentInd, classifierInd students = getUniqueStudents() if studentInd >= len(students): return if balanced: probs = calcBalancedProbs(students, numSource, numTarget, studentInd) else: probs = calcProbs(students, numSource, numTarget, studentInd) print 'probs:', probs, sum(probs) counts = [0 for s in students] for i in range(numTarget): r = random.random() total = 0 for j, p in enumerate(probs): total += p if r < total: ind = j break else: ind = len(probs) - 1 counts[ind] += 1 eps = 1e-10 props = [ eps + float(c) / (numTarget if i == studentInd else numSource) for i, c in enumerate(counts) ] try: arffFilename = makeTemp('.arff') #arffFilenameFilt = makeTemp('.arff') tempFile = makeTemp('.arff') with open(arffFilename, 'w') as arffFile: for i, (student, prop) in enumerate(zip(students, props)): inFile = getFilename( base % (numTarget if student == students[studentInd] else numSource), student, TRAIN) print 'resampling', student resample(inFile, tempFile, prop) with open(tempFile, 'r') as f: if i != 0: for line in f: if line.strip() == '@data': break for line in f: arffFile.write(line) print 'removing trial step' #removeTrialStep(arffFilename,arffFilenameFilt) if balanced: name = 'trBagg-balanced-%s-%i' % (students[studentInd], classifierInd) else: name = 'trBagg-%s-%i' % (students[studentInd], classifierInd) makeTree(arffFilename, True, None, outputBase, name, [], False, 1.0) #makeTree(arffFilenameFilt,True,None,outputBase,'trBagg-%s-%i' % (students[studentInd],classifierInd),[],False,1.0) finally: os.remove(arffFilename) #os.remove(arffFilenameFilt) os.remove(tempFile) os.remove(getFilename(outputBase, name, DESC))
def main(args=sys.argv[1:]): usage = 'createTrBagg [--balanced] numTarget numSource jobStart [numJobs]' balanced = False if '--balanced' in args: args.remove('--balanced') balanced = True if (len(args) < 3) or (len(args) > 4): print >>sys.stderr,usage sys.exit(1) numClassifiers = 1000 numTarget = int(args[0]) numSource = int(args[1]) jobStart = int(args[2]) if len(args) >= 4: numJobs = int(args[3]) else: numJobs = 1 jobStart *= numJobs base = 'studentsNew29-unperturbed-%i' outputBase = 'studentsNew29-unperturbed-transfer/target%i-source%i' % (numTarget,numSource) if not os.path.exists('data/dt/' + outputBase + '/desc'): os.makedirs('data/dt/' + outputBase + '/desc') if not os.path.exists('data/dt/' + outputBase + '/weighted'): os.makedirs('data/dt/' + outputBase + '/weighted') for jobOffset in range(numJobs): jobNum = jobStart + jobOffset studentInd = jobNum / numClassifiers classifierInd = jobNum % numClassifiers print 'job,student,classifier:',jobNum,studentInd,classifierInd students = getUniqueStudents() if studentInd >= len(students): return if balanced: probs = calcBalancedProbs(students,numSource,numTarget,studentInd) else: probs = calcProbs(students,numSource,numTarget,studentInd) print 'probs:',probs,sum(probs) counts = [0 for s in students] for i in range(numTarget): r = random.random() total = 0 for j,p in enumerate(probs): total += p if r < total: ind = j break else: ind = len(probs) - 1 counts[ind] += 1 eps = 1e-10 props = [eps + float(c) / (numTarget if i == studentInd else numSource) for i,c in enumerate(counts)] try: arffFilename = makeTemp('.arff') #arffFilenameFilt = makeTemp('.arff') tempFile = makeTemp('.arff') with open(arffFilename,'w') as arffFile: for i,(student,prop) in enumerate(zip(students,props)): inFile = getFilename(base % (numTarget if student == students[studentInd] else numSource),student,TRAIN) print 'resampling',student resample(inFile,tempFile,prop) with open(tempFile,'r') as f: if i != 0: for line in f: if line.strip() == '@data': break for line in f: arffFile.write(line) print 'removing trial step' #removeTrialStep(arffFilename,arffFilenameFilt) if balanced: name = 'trBagg-balanced-%s-%i' % (students[studentInd],classifierInd) else: name = 'trBagg-%s-%i' % (students[studentInd],classifierInd) makeTree(arffFilename,True,None,outputBase,name,[],False,1.0) #makeTree(arffFilenameFilt,True,None,outputBase,'trBagg-%s-%i' % (students[studentInd],classifierInd),[],False,1.0) finally: os.remove(arffFilename) #os.remove(arffFilenameFilt) os.remove(tempFile) os.remove(getFilename(outputBase,name,DESC))
def main(args = sys.argv[1:]): options = parseArgs(args) testFile = getFilename(options.testBase,options.student,TRAIN) cmd = ['bin/%s/runClassifier' % getArch(),options.saveConfigFilename,testFile,str(options.numTest),'--notrain'] subprocess.check_call(cmd)
def newSubject(self, picturePath): filename = getFilename(picturePath) name, email = decodeSubjectPictureName(filename) # doFullContactSearch(email) doBuscarCUITSearch(name) doPiplSearch(email)