def getIdSets(self, classIds=None, featureIds=None, allowNewIds=True): # Class ids #print classIds #print featureIds if classIds != None and os.path.exists(classIds): print >> sys.stderr, "Using predefined class names from", classIds classSet = IdSet(allowNewIds=allowNewIds) classSet.load(classIds) else: print >> sys.stderr, "No predefined class names" classSet = None # Feature ids if featureIds != None and os.path.exists(featureIds): print >> sys.stderr, "Using predefined feature names from", featureIds featureSet = IdSet(allowNewIds=allowNewIds) featureSet.load(featureIds) else: print >> sys.stderr, "No predefined feature names" featureSet = None return classSet, featureSet
psyco.full() print >> sys.stderr, "Found Psyco, using" except ImportError: print >> sys.stderr, "Psyco not installed" defaultAnalysisFilename = "/usr/share/biotext/ComplexPPI/BioInferForComplexPPIVisible.xml" optparser = OptionParser(usage="%prog [options]\nCreate an html visualization for a corpus.") optparser.add_option("-i", "--invariant", default=None, dest="invariant", help="Corpus in analysis format", metavar="FILE") optparser.add_option("-v", "--variant", default=None, dest="variant", help="Corpus in analysis format", metavar="FILE") (options, args) = optparser.parse_args() #invariantExamples = ExampleUtils.readExamples(os.path.join(options.invariant, "examples.txt")) variantExamples = ExampleUtils.readExamples(os.path.join(options.variant, "test-triggers.examples")) invariantFeatureSet = IdSet() invariantFeatureSet.load(os.path.join(options.invariant, "feature_names.txt")) invariantClassSet = IdSet() invariantClassSet.load(os.path.join(options.invariant, "class_names.txt")) variantFeatureSet = IdSet() variantFeatureSet.load(os.path.join(options.variant, "test-triggers.examples.feature_names")) variantClassSet = IdSet() variantClassSet.load(os.path.join(options.variant, "test-triggers.examples.class_names")) counter = ProgressCounter(len(variantExamples)) for example in variantExamples: counter.update() example[1] = invariantClassSet.getId(variantClassSet.getName(example[1])) newFeatures = {} for k,v in example[2].iteritems(): newFeatures[ invariantFeatureSet.getId(variantFeatureSet.getName(k)) ] = v
# Optimize optimizationSets = Example.divideExamples(exampleSets[0]) evaluationArgs = {"classSet":exampleBuilder.classSet} if options.parameters != None: paramDict = splitParameters(options.parameters) bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], paramDict, Evaluation, evaluationArgs) else: bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], evaluationClass=Evaluation, evaluationArgs=evaluationArgs) else: print >> sys.stderr, "Using predefined model" bestResults = [None,None,{}] for k,v in classifierParamDict.iteritems(): bestResults[2][k] = v featureSet = IdSet() featureSet.load(os.path.join(classifierParamDict["predefined"][0], "feature_names.txt")) classSet = None if os.path.exists(os.path.join(classifierParamDict["predefined"][0], "class_names.txt")): classSet = IdSet() classSet.load(os.path.join(classifierParamDict["predefined"][0], "class_names.txt")) exampleBuilder = ExampleBuilder(featureSet=featureSet, classSet=classSet, **splitParameters(options.exampleBuilderParameters)) # Save training sets if options.output != None: print >> sys.stderr, "Saving example sets to", options.output Example.writeExamples(exampleSets[0], options.output + "/examplesTrain.txt") if not classifierParamDict.has_key("predefined"): Example.writeExamples(optimizationSets[0], options.output + "/examplesOptimizationTest.txt") Example.writeExamples(optimizationSets[1], options.output + "/examplesOptimizationTrain.txt") TableUtils.writeCSV(bestResults[2], options.output +"/best_parameters.csv") # Optimize and train
bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], paramDict, Evaluation, evaluationArgs) else: bestResults = classifier.optimize([optimizationSets[0]], [optimizationSets[1]], evaluationClass=Evaluation, evaluationArgs=evaluationArgs) else: print >> sys.stderr, "Using predefined model" bestResults = [None, None, {}] for k, v in classifierParamDict.iteritems(): bestResults[2][k] = v featureSet = IdSet() featureSet.load( os.path.join(classifierParamDict["predefined"][0], "feature_names.txt")) classSet = None if os.path.exists( os.path.join(classifierParamDict["predefined"][0], "class_names.txt")): classSet = IdSet() classSet.load( os.path.join(classifierParamDict["predefined"][0], "class_names.txt")) exampleBuilder = ExampleBuilder(featureSet=featureSet, classSet=classSet, **splitParameters( options.exampleBuilderParameters)) # Save training sets if options.output != None:
help="Corpus in analysis format", metavar="FILE") optparser.add_option("-v", "--variant", default=None, dest="variant", help="Corpus in analysis format", metavar="FILE") (options, args) = optparser.parse_args() #invariantExamples = ExampleUtils.readExamples(os.path.join(options.invariant, "examples.txt")) variantExamples = ExampleUtils.readExamples( os.path.join(options.variant, "test-triggers.examples")) invariantFeatureSet = IdSet() invariantFeatureSet.load( os.path.join(options.invariant, "feature_names.txt")) invariantClassSet = IdSet() invariantClassSet.load(os.path.join(options.invariant, "class_names.txt")) variantFeatureSet = IdSet() variantFeatureSet.load( os.path.join(options.variant, "test-triggers.examples.feature_names")) variantClassSet = IdSet() variantClassSet.load( os.path.join(options.variant, "test-triggers.examples.class_names")) counter = ProgressCounter(len(variantExamples)) for example in variantExamples: counter.update() example[1] = invariantClassSet.getId( variantClassSet.getName(example[1]))