# else: clusters = self.posclusters print map(str, self.cluster2data[c]) for d in list(self.cluster2data[c]): print d print map(str, self.clusters[d]), '->', self.clusters[d].remove(c) print map(str, self.clusters[d]) del self.cluster2data[c] def strcluster(self, c): return map(str, self.cluster2data[c]) if __name__ == '__main__': # s = ['otto', 'otte', 'obama', 'markov logic network', 'markov logic', 'otta', 'markov random field'] # # print SAHN(s) from mln.mln import readMLNFromFile from mln.database import readDBFromFile mln = readMLNFromFile('/home/nyga/code/pracmln/models/object-detection.mln') dbs = readDBFromFile(mln, '/home/nyga/code/pracmln/models/scenes.db') mln.materializeFormulaTemplates(dbs, verbose=True) print mln.domains['text'] clusters = SAHN(mln.domains['text']) for c in clusters: print c
def doXVal(folds, percent, verbose, multicore, noisy, predName, domain, mlnfile, dbfiles, logicLearn, logicInfer, inverse=False, testSetCount=1): startTime = time.time() directory = time.strftime( "%a_%d_%b_%Y_%H:%M:%S_K=" + str(folds) + "_TSC=" + str(testSetCount), time.localtime()) os.mkdir(directory) os.mkdir(os.path.join(directory, 'FOL')) os.mkdir(os.path.join(directory, 'FUZZY')) # set up the logger log = logging.getLogger('xval') fileLogger = FileHandler(os.path.join(directory, 'xval.log')) fileLogger.setFormatter(praclog.formatter) log.addHandler(fileLogger) log.info('Results will be written into %s' % directory) # preparations: Read the MLN and the databases mln_ = readMLNFromFile(mlnfile, verbose=verbose, logic='FuzzyLogic', grammar='PRACGrammar') log.info('Read MLN %s.' % mlnfile) dbs = [] for dbfile in dbfiles: db = readDBFromFile(mln_, dbfile) if type(db) is list: dbs.extend(db) else: dbs.append(db) log.info('Read %d databases.' % len(dbs)) cwpreds = [pred for pred in mln_.predicates if pred != predName] # create the partition of data subsetLen = int(math.ceil(len(dbs) * percent / 100.0)) if subsetLen < len(dbs): log.info('Using only %d of %d DBs' % (subsetLen, len(dbs))) dbs = sample(dbs, subsetLen) if len(dbs) < folds: log.error( 'Cannot do %d-fold cross validation with only %d databases.' % (folds, len(dbs))) exit(0) shuffle(dbs) partSize = int(math.ceil(len(dbs) / float(folds))) partition = [] for i in range(folds): partition.append(dbs[i * partSize:(i + 1) * partSize]) foldRunnables = [] for foldIdx in range(folds): partion_ = list(partition) params = XValFoldParams() params.mln = mln_.duplicate() params.testDBs = [] params.learnDBs = [] for i in range(0, testSetCount): if (foldIdx >= len(partion_)): params.testDBs.extend(partion_[0]) del partion_[0] else: params.testDBs.extend(partion_[foldIdx]) del partion_[foldIdx] for part in partion_: params.learnDBs.extend(part) print 'LEARN DBS :' + str(len(params.learnDBs)) print 'TEST DBS :' + str(len(params.testDBs)) params.foldIdx = foldIdx params.foldCount = folds params.noisyStringDomains = noisy params.directory = directory params.queryPred = predName params.queryDom = domain params.logicInfer = logicInfer foldRunnables.append(XValFold(params)) if multicore: # set up a pool of worker processes try: workerPool = Pool() log.info('Starting %d-fold Cross-Validation in %d processes.' % (folds, workerPool._processes)) result = workerPool.map_async(runFold, foldRunnables).get() workerPool.close() workerPool.join() cm = ConfusionMatrix() for r in result: cm.combine(r.confMatrix) elapsedTimeMP = time.time() - startTime prepareResults(directory, 'FOL') prepareResults(directory, 'FUZZY') except (KeyboardInterrupt, SystemExit, SystemError): log.critical("Caught KeyboardInterrupt, terminating workers") workerPool.terminate() workerPool.join() exit(1) except: log.error('\n' + ''.join(traceback.format_exception(*sys.exc_info()))) exit(1) # startTime = time.time() else: log.info('Starting %d-fold Cross-Validation in 1 process.' % (folds)) for fold in foldRunnables: runFold(fold) prepareResults(directory, 'FOL') prepareResults(directory, 'FUZZY') elapsedTimeSP = time.time() - startTime if multicore: log.info('%d-fold crossvalidation (MP) took %.2f min' % (folds, elapsedTimeMP / 60.0)) else: log.info('%d-fold crossvalidation (SP) took %.2f min' % (folds, elapsedTimeSP / 60.0))
# else: clusters = self.posclusters print map(str, self.cluster2data[c]) for d in list(self.cluster2data[c]): print d print map(str, self.clusters[d]), '->', self.clusters[d].remove(c) print map(str, self.clusters[d]) del self.cluster2data[c] def strcluster(self, c): return map(str, self.cluster2data[c]) if __name__ == '__main__': # s = ['otto', 'otte', 'obama', 'markov logic network', 'markov logic', 'otta', 'markov random field'] # # print SAHN(s) from mln.mln import readMLNFromFile from mln.database import readDBFromFile mln = readMLNFromFile( '/home/nyga/code/pracmln/models/object-detection.mln') dbs = readDBFromFile(mln, '/home/nyga/code/pracmln/models/scenes.db') mln.materializeFormulaTemplates(dbs, verbose=True) print mln.domains['text'] clusters = SAHN(mln.domains['text']) for c in clusters: print c