def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None): # reset style regardless of input #style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures" if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert( classSet.getId("neg") == 1 ) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) defaultNone = ["binary", "trigger_features","typed","directed","no_linear","entities","genia_limits", "noAnnType", "noMasking", "maxFeatures", "no_merge", "disable_entity_features", "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features"] defaultParameters = {} for name in defaultNone: defaultParameters[name] = None defaultParameters["keep_intersentence"] = False defaultParameters["keep_intersentence_gold"] = True self.styles = self._setDefaultParameters(defaultParameters) self.styles = self.getParameters(style) self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) self.multiEdgeFeatureBuilder.noAnnType = self.styles["noAnnType"] self.multiEdgeFeatureBuilder.maskNamedEntities = not self.styles["noMasking"] self.multiEdgeFeatureBuilder.maximum = self.styles["maxFeatures"] #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) self.pathLengths = length assert(self.pathLengths == None) self.types = types self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = True
def __init__(self, style=["typed", "directed"], length=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert (classSet.getId("neg") == 1) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) if style.find(",") != -1: style = style.split(",") self.styles = style self.negFrac = None self.posPairGaz = POSPairGazetteer() for s in style: if s.find("negFrac") != -1: self.negFrac = float(s.split("_")[-1]) print >> sys.stderr, "Downsampling negatives to", self.negFrac self.negRand = random.Random(15) elif s.find("posPairGaz") != -1: self.posPairGaz = POSPairGazetteer( loadFrom=s.split("_", 1)[-1]) self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) if "graph_kernel" in self.styles: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder( self.featureSet) if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) if "ontology" in self.styles: self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder( self.featureSet) if "nodalida" in self.styles: self.nodalidaFeatureBuilder = NodalidaFeatureBuilder( self.featureSet) #IF LOCAL if "bioinfer_limits" in self.styles: self.bioinferOntologies = OntologyUtils.getBioInferTempOntology() #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName) #ENDIF self.pathLengths = length assert (self.pathLengths == None) self.types = types if "random" in self.styles: from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
def __init__(self, style=["typed", "directed", "headsOnly"], length=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert (classSet.getId("neg") == 1) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) self.styles = style self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) #if "ontology" in self.styles: # self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet) self.pathLengths = length assert (self.pathLengths == None) self.types = types
def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None): if classSet == None: classSet = IdSet(1) assert (classSet.getId("neg") == 1) if featureSet == None: featureSet = IdSet() ExampleBuilder.__init__(self, classSet, featureSet) #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train" if gazetteerFileName != None: self.gazetteer = Gazetteer.loadGztr(gazetteerFileName) print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer = None self.styles = style self.skiplist = set() if skiplist != None: f = open(skiplist, "rt") for line in f.readlines(): self.skiplist.add(line.strip()) f.close() self.styles = [ "trigger_features", "typed", "directed", "no_linear", "entities", "genia_limits", "noMasking", "maxFeatures" ] self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) if "graph_kernel" in self.styles: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder( self.featureSet) if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
def __init__(self, style=["typed","directed","headsOnly"], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert( classSet.getId("neg") == 1 ) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) self.styles = style self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) #if "noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True #if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False #if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = False
def __init__(self, style=["typed", "directed", "headsOnly"], length=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert (classSet.getId("neg") == 1) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) self.styles = style self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) self.counts = {} self.countsPerType = {} self.untypedCounts = {} self.tokenCounts = {}
def __init__(self, style=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) ) # Basic style = trigger_features:typed:directed:no_linear:entities:auto_limits:noMasking:maxFeatures self._setDefaultParameters([ "directed", "undirected", "headsOnly", "graph_kernel", "noAnnType", "mask_nodes", "limit_features", "no_auto_limits", "co_features", "genia_features", "bi_features", #"genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits", "genia_task1", "ontology", "nodalida", "bacteria_renaming", "no_trigger_features", "rel_features", "drugbank_features", "ddi_mtmx", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "token_nodes", "skip_extra_triggers", "headsOnly", "graph_kernel", "no_task", "no_dependency", "disable_entity_features", "disable_terminus_features", "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features", "linear_features", "subset", "binary", "pos_only", "entity_type", "filter_shortest_path", "maskTypeAsProtein", "keep_neg", "metamap", "sdb_merge", "sdb_features", "ontobiotope_features", "no_self_loops", "full_entities", "no_features", "wordnet", "wordvector", "se10t8_undirected", "filter_types", "doc_extra", "entity_extra"]) self.styles = self.getParameters(style) #if style == None: # no parameters given # style["typed"] = style["directed"] = style["headsOnly"] = True self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet, self.styles) # NOTE Temporarily re-enabling predicted range #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None) if self.styles["graph_kernel"]: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet) if self.styles["noAnnType"]: self.multiEdgeFeatureBuilder.noAnnType = True if self.styles["mask_nodes"]: self.multiEdgeFeatureBuilder.maskNamedEntities = True else: self.multiEdgeFeatureBuilder.maskNamedEntities = False if not self.styles["limit_features"]: self.multiEdgeFeatureBuilder.maximum = True if self.styles["genia_task1"]: self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity") self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) if self.styles["ontology"]: self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet) if self.styles["ontobiotope_features"]: self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet) if self.styles["nodalida"]: self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet) if self.styles["bacteria_renaming"]: self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet) if not self.styles["no_trigger_features"]: self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet, self.styles) self.triggerFeatureBuilder.useNonNameEntities = True if self.styles["noAnnType"]: self.triggerFeatureBuilder.noAnnType = True if self.styles["genia_task1"]: self.triggerFeatureBuilder.filterAnnTypes.add("Entity") #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName) if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["drugbank_features"]: self.drugFeatureBuilder = DrugFeatureBuilder(featureSet) if self.styles["evex"]: self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet) if self.styles["wordnet"]: self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet) if self.styles["wordvector"]: self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet, self.styles) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet) self.types = types if self.styles["random"]: from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
def __init__(self, includeNeg=False): self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(None) self.gazetteer = {} self.includeNeg = includeNeg
def __init__(self, style=["typed", "directed", "headsOnly"], length=None, types=[], featureSet=None, classSet=None, gazetteer=None, pathGazetteer=None, negFrac=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert (classSet.getId("neg") == 1) if gazetteer != None: print >> sys.stderr, "Loading gazetteer from", gazetteer self.gazetteer = Gazetteer.loadGztr(gazetteer) else: print >> sys.stderr, "No gazetteer loaded" self.gazetteer = None self.pathGazetteer = None self.pathGazetteerDependencies = None self.pathGazetteerPairs = None if pathGazetteer != None: print >> sys.stderr, "Loading path gazetteer from", pathGazetteer self.pathGazetteer = PathGazetteer.load(pathGazetteer) self.pathGazetteerDependencies = PathGazetteer.getDependencies( self.pathGazetteer) self.pathGazetteerPairs = PathGazetteer.getPairs( self.pathGazetteer) else: print >> sys.stderr, "No path gazetteer loaded" ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) self.styles = style self.negFrac = negFrac print >> sys.stderr, "Downsampling negatives to", negFrac self.negRand = random.Random() self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) if True: #"noAnnType" in self.styles: self.multiEdgeFeatureBuilder.noAnnType = True if "noMasking" in self.styles: self.multiEdgeFeatureBuilder.maskNamedEntities = False if "maxFeatures" in self.styles: self.multiEdgeFeatureBuilder.maximum = True self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) #if "ontology" in self.styles: # self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet) self.pathLengths = length assert (self.pathLengths == None) self.types = types self.eventsByOrigId = {} self.headTokensByOrigId = {} self.interSentenceEvents = set() self.examplesByEventOrigId = {} self.skippedByType = {} self.skippedByTypeAndReason = {} self.builtByType = {} self.gazMatchCache = {}
def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None): if featureSet == None: featureSet = IdSet() if classSet == None: classSet = IdSet(1) else: classSet = classSet assert (classSet.getId("neg") == 1 or (len(classSet.Ids) == 2 and classSet.getId("neg") == -1)) ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet) self.styles = self.getParameters(style, [ "typed", "directed", "headsOnly", "graph_kernel", "noAnnType", "noMasking", "maxFeatures", "genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits", "genia_task1", "ontology", "nodalida", "bacteria_renaming", "trigger_features", "rel_features", "ddi_features", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "entities", "skip_extra_triggers", "headsOnly", "graph_kernel", "trigger_features", "no_task", "no_dependency", "disable_entity_features", "disable_terminus_features", "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features", "no_linear", "subset", "binary", "pos_only", "entity_type" ]) if style == None: # no parameters given style["typed"] = style["directed"] = style["headsOnly"] = True # self.styles = style # if "selftrain_group" in self.styles: # self.selfTrainGroups = set() # if "selftrain_group-1" in self.styles: # self.selfTrainGroups.add("-1") # if "selftrain_group0" in self.styles: # self.selfTrainGroups.add("0") # if "selftrain_group1" in self.styles: # self.selfTrainGroups.add("1") # if "selftrain_group2" in self.styles: # self.selfTrainGroups.add("2") # if "selftrain_group3" in self.styles: # self.selfTrainGroups.add("3") # print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet) # NOTE Temporarily re-enabling predicted range #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None) if self.styles["graph_kernel"]: from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder( self.featureSet) if self.styles["noAnnType"]: self.multiEdgeFeatureBuilder.noAnnType = True if self.styles["noMasking"]: self.multiEdgeFeatureBuilder.maskNamedEntities = False if self.styles["maxFeatures"]: self.multiEdgeFeatureBuilder.maximum = True if self.styles["genia_task1"]: self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity") self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet) if self.styles["ontology"]: self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder( self.featureSet) if self.styles["nodalida"]: self.nodalidaFeatureBuilder = NodalidaFeatureBuilder( self.featureSet) if self.styles["bacteria_renaming"]: self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder( self.featureSet) if self.styles["trigger_features"]: self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet) self.triggerFeatureBuilder.useNonNameEntities = True if self.styles["genia_task1"]: self.triggerFeatureBuilder.filterAnnTypes.add("Entity") #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName) if self.styles["rel_features"]: self.relFeatureBuilder = RELFeatureBuilder(featureSet) if self.styles["ddi_features"]: self.drugFeatureBuilder = DrugFeatureBuilder(featureSet) if self.styles["evex"]: self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet) if self.styles["giuliano"]: self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet) self.pathLengths = length assert (self.pathLengths == None) self.types = types if self.styles["random"]: from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)