def __init__(self, style=None, length=None, types=[], featureSet=None, classSet=None):
        # reset style regardless of input
        #style="trigger_features:typed:directed:no_linear:entities:genia_limits:noMasking:maxFeatures"
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert( classSet.getId("neg") == 1 )
        
        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        
        defaultNone = ["binary", "trigger_features","typed","directed","no_linear","entities","genia_limits",
            "noAnnType", "noMasking", "maxFeatures", "no_merge", "disable_entity_features", 
            "disable_single_element_features", "disable_ngram_features", "disable_path_edge_features"]
        defaultParameters = {}
        for name in defaultNone:
            defaultParameters[name] = None
        defaultParameters["keep_intersentence"] = False
        defaultParameters["keep_intersentence_gold"] = True
        self.styles = self._setDefaultParameters(defaultParameters)
        self.styles = self.getParameters(style)
        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        self.multiEdgeFeatureBuilder.noAnnType = self.styles["noAnnType"]
        self.multiEdgeFeatureBuilder.maskNamedEntities = not self.styles["noMasking"]
        self.multiEdgeFeatureBuilder.maximum = self.styles["maxFeatures"]
        #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert(self.pathLengths == None)
        self.types = types

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder.useNonNameEntities = True
예제 #2
0
    def __init__(self,
                 style=["typed", "directed"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        if style.find(",") != -1:
            style = style.split(",")
        self.styles = style

        self.negFrac = None
        self.posPairGaz = POSPairGazetteer()
        for s in style:
            if s.find("negFrac") != -1:
                self.negFrac = float(s.split("_")[-1])
                print >> sys.stderr, "Downsampling negatives to", self.negFrac
                self.negRand = random.Random(15)
            elif s.find("posPairGaz") != -1:
                self.posPairGaz = POSPairGazetteer(
                    loadFrom=s.split("_", 1)[-1])

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        if "graph_kernel" in self.styles:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(
                self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        if "ontology" in self.styles:
            self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(
                self.featureSet)
        if "nodalida" in self.styles:
            self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(
                self.featureSet)
        #IF LOCAL
        if "bioinfer_limits" in self.styles:
            self.bioinferOntologies = OntologyUtils.getBioInferTempOntology()
            #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
        #ENDIF
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
        if "random" in self.styles:
            from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
            self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
예제 #3
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True
        #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        #if "ontology" in self.styles:
        #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
예제 #4
0
    def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None,
                 skiplist=None):
        if classSet == None:
            classSet = IdSet(1)
        assert (classSet.getId("neg") == 1)
        if featureSet == None:
            featureSet = IdSet()

        ExampleBuilder.__init__(self, classSet, featureSet)
        #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
        if gazetteerFileName != None:
            self.gazetteer = Gazetteer.loadGztr(gazetteerFileName)
            print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer = None
        self.styles = style

        self.skiplist = set()
        if skiplist != None:
            f = open(skiplist, "rt")
            for line in f.readlines():
                self.skiplist.add(line.strip())
            f.close()

        self.styles = [
            "trigger_features", "typed", "directed", "no_linear", "entities",
            "genia_limits", "noMasking", "maxFeatures"
        ]
        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if "graph_kernel" in self.styles:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(
                self.featureSet)
        if "noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
 def __init__(self, style=["typed","directed","headsOnly"], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     assert( classSet.getId("neg") == 1 )
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     self.styles = style
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
     #if "noAnnType" in self.styles:
     self.multiEdgeFeatureBuilder.noAnnType = True
     #if "noMasking" in self.styles:
     self.multiEdgeFeatureBuilder.maskNamedEntities = False
     #if "maxFeatures" in self.styles:
     self.multiEdgeFeatureBuilder.maximum = True
     self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
     self.triggerFeatureBuilder.useNonNameEntities = False
예제 #6
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)

        self.counts = {}
        self.countsPerType = {}
        self.untypedCounts = {}
        self.tokenCounts = {}
예제 #7
0
 def __init__(self, style=None, types=[], featureSet=None, classSet=None):
     if featureSet == None:
         featureSet = IdSet()
     if classSet == None:
         classSet = IdSet(1)
     else:
         classSet = classSet
     
     ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
     assert( classSet.getId("neg") == 1 or (len(classSet.Ids)== 2 and classSet.getId("neg") == -1) )
     
     # Basic style = trigger_features:typed:directed:no_linear:entities:auto_limits:noMasking:maxFeatures
     self._setDefaultParameters([
         "directed", "undirected", "headsOnly", "graph_kernel", "noAnnType", "mask_nodes", "limit_features",
         "no_auto_limits", "co_features", "genia_features", "bi_features", #"genia_limits", "epi_limits", "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
         "genia_task1", "ontology", "nodalida", "bacteria_renaming", "no_trigger_features", "rel_features",
         "drugbank_features", "ddi_mtmx", "evex", "giuliano", "random", "themeOnly", "causeOnly", "no_path", "token_nodes", 
         "skip_extra_triggers", "headsOnly", "graph_kernel", "no_task", "no_dependency", 
         "disable_entity_features", "disable_terminus_features", "disable_single_element_features", 
         "disable_ngram_features", "disable_path_edge_features", "linear_features", "subset", "binary", "pos_only",
         "entity_type", "filter_shortest_path", "maskTypeAsProtein", "keep_neg", "metamap", 
         "sdb_merge", "sdb_features", "ontobiotope_features", "no_self_loops", "full_entities",
         "no_features", "wordnet", "wordvector", "se10t8_undirected", "filter_types", "doc_extra",
         "entity_extra"])
     self.styles = self.getParameters(style)
     #if style == None: # no parameters given
     #    style["typed"] = style["directed"] = style["headsOnly"] = True
     
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet, self.styles)
     # NOTE Temporarily re-enabling predicted range
     #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None)
     if self.styles["graph_kernel"]:
         from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
         self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(self.featureSet)
     if self.styles["noAnnType"]:
         self.multiEdgeFeatureBuilder.noAnnType = True
     if self.styles["mask_nodes"]:
         self.multiEdgeFeatureBuilder.maskNamedEntities = True
     else:
         self.multiEdgeFeatureBuilder.maskNamedEntities = False
     if not self.styles["limit_features"]:
         self.multiEdgeFeatureBuilder.maximum = True
     if self.styles["genia_task1"]:
         self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
     self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
     if self.styles["ontology"]:
         self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
     if self.styles["ontobiotope_features"]:
         self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet)
     if self.styles["nodalida"]:
         self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(self.featureSet)
     if self.styles["bacteria_renaming"]:
         self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(self.featureSet)
     if not self.styles["no_trigger_features"]:
         self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet, self.styles)
         self.triggerFeatureBuilder.useNonNameEntities = True
         if self.styles["noAnnType"]:
             self.triggerFeatureBuilder.noAnnType = True
         if self.styles["genia_task1"]:
             self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
         #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
     if self.styles["rel_features"]:
         self.relFeatureBuilder = RELFeatureBuilder(featureSet)
     if self.styles["drugbank_features"]:
         self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
     if self.styles["evex"]:
         self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
     if self.styles["wordnet"]:
         self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet)
     if self.styles["wordvector"]:
         self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet, self.styles)
     if self.styles["giuliano"]:
         self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
     self.types = types
     if self.styles["random"]:
         from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
         self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)
예제 #8
0
 def __init__(self, includeNeg=False):
     self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(None)
     self.gazetteer = {}
     self.includeNeg = includeNeg
예제 #9
0
    def __init__(self,
                 style=["typed", "directed", "headsOnly"],
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None,
                 gazetteer=None,
                 pathGazetteer=None,
                 negFrac=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1)

        if gazetteer != None:
            print >> sys.stderr, "Loading gazetteer from", gazetteer
            self.gazetteer = Gazetteer.loadGztr(gazetteer)
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer = None

        self.pathGazetteer = None
        self.pathGazetteerDependencies = None
        self.pathGazetteerPairs = None
        if pathGazetteer != None:
            print >> sys.stderr, "Loading path gazetteer from", pathGazetteer
            self.pathGazetteer = PathGazetteer.load(pathGazetteer)
            self.pathGazetteerDependencies = PathGazetteer.getDependencies(
                self.pathGazetteer)
            self.pathGazetteerPairs = PathGazetteer.getPairs(
                self.pathGazetteer)
        else:
            print >> sys.stderr, "No path gazetteer loaded"

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)
        self.styles = style
        self.negFrac = negFrac
        print >> sys.stderr, "Downsampling negatives to", negFrac
        self.negRand = random.Random()

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        if True:  #"noAnnType" in self.styles:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if "noMasking" in self.styles:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if "maxFeatures" in self.styles:
            self.multiEdgeFeatureBuilder.maximum = True

        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        #self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        #if "ontology" in self.styles:
        #    self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(self.featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types

        self.eventsByOrigId = {}
        self.headTokensByOrigId = {}
        self.interSentenceEvents = set()

        self.examplesByEventOrigId = {}
        self.skippedByType = {}
        self.skippedByTypeAndReason = {}
        self.builtByType = {}

        self.gazMatchCache = {}
예제 #10
0
    def __init__(self,
                 style=None,
                 length=None,
                 types=[],
                 featureSet=None,
                 classSet=None):
        if featureSet == None:
            featureSet = IdSet()
        if classSet == None:
            classSet = IdSet(1)
        else:
            classSet = classSet
        assert (classSet.getId("neg") == 1
                or (len(classSet.Ids) == 2 and classSet.getId("neg") == -1))

        ExampleBuilder.__init__(self, classSet=classSet, featureSet=featureSet)

        self.styles = self.getParameters(style, [
            "typed", "directed", "headsOnly", "graph_kernel", "noAnnType",
            "noMasking", "maxFeatures", "genia_limits", "epi_limits",
            "id_limits", "rel_limits", "bb_limits", "bi_limits", "co_limits",
            "genia_task1", "ontology", "nodalida", "bacteria_renaming",
            "trigger_features", "rel_features", "ddi_features", "evex",
            "giuliano", "random", "themeOnly", "causeOnly", "no_path",
            "entities", "skip_extra_triggers", "headsOnly", "graph_kernel",
            "trigger_features", "no_task", "no_dependency",
            "disable_entity_features", "disable_terminus_features",
            "disable_single_element_features", "disable_ngram_features",
            "disable_path_edge_features", "no_linear", "subset", "binary",
            "pos_only", "entity_type"
        ])
        if style == None:  # no parameters given
            style["typed"] = style["directed"] = style["headsOnly"] = True
#        self.styles = style
#        if "selftrain_group" in self.styles:
#            self.selfTrainGroups = set()
#            if "selftrain_group-1" in self.styles:
#                self.selfTrainGroups.add("-1")
#            if "selftrain_group0" in self.styles:
#                self.selfTrainGroups.add("0")
#            if "selftrain_group1" in self.styles:
#                self.selfTrainGroups.add("1")
#            if "selftrain_group2" in self.styles:
#                self.selfTrainGroups.add("2")
#            if "selftrain_group3" in self.styles:
#                self.selfTrainGroups.add("3")
#            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups

        self.multiEdgeFeatureBuilder = MultiEdgeFeatureBuilder(self.featureSet)
        # NOTE Temporarily re-enabling predicted range
        #self.multiEdgeFeatureBuilder.definePredictedValueRange([], None)
        if self.styles["graph_kernel"]:
            from FeatureBuilders.GraphKernelFeatureBuilder import GraphKernelFeatureBuilder
            self.graphKernelFeatureBuilder = GraphKernelFeatureBuilder(
                self.featureSet)
        if self.styles["noAnnType"]:
            self.multiEdgeFeatureBuilder.noAnnType = True
        if self.styles["noMasking"]:
            self.multiEdgeFeatureBuilder.maskNamedEntities = False
        if self.styles["maxFeatures"]:
            self.multiEdgeFeatureBuilder.maximum = True
        if self.styles["genia_task1"]:
            self.multiEdgeFeatureBuilder.filterAnnTypes.add("Entity")
        self.tokenFeatureBuilder = TokenFeatureBuilder(self.featureSet)
        if self.styles["ontology"]:
            self.multiEdgeFeatureBuilder.ontologyFeatureBuilder = BioInferOntologyFeatureBuilder(
                self.featureSet)
        if self.styles["nodalida"]:
            self.nodalidaFeatureBuilder = NodalidaFeatureBuilder(
                self.featureSet)
        if self.styles["bacteria_renaming"]:
            self.bacteriaRenamingFeatureBuilder = BacteriaRenamingFeatureBuilder(
                self.featureSet)
        if self.styles["trigger_features"]:
            self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
            self.triggerFeatureBuilder.useNonNameEntities = True
            if self.styles["genia_task1"]:
                self.triggerFeatureBuilder.filterAnnTypes.add("Entity")
            #self.bioinferOntologies = OntologyUtils.loadOntologies(OntologyUtils.g_bioInferFileName)
        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["ddi_features"]:
            self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
        if self.styles["evex"]:
            self.evexFeatureBuilder = EVEXFeatureBuilder(featureSet)
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
        self.pathLengths = length
        assert (self.pathLengths == None)
        self.types = types
        if self.styles["random"]:
            from FeatureBuilders.RandomFeatureBuilder import RandomFeatureBuilder
            self.randomFeatureBuilder = RandomFeatureBuilder(self.featureSet)