コード例 #1
0
    def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None):
        global speculationWords

        if classSet == None:
            classSet = IdSet(1)
        assert (classSet.getId("neg") == 1)
        if featureSet == None:
            featureSet = IdSet()

        self.specWords, self.specWordStems = readWords(speculationWords)

        ExampleBuilder.__init__(self, classSet, featureSet)
        #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
        if gazetteerFileName != None:
            self.gazetteer = Gazetteer.loadGztr(gazetteerFileName)
            print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName
        else:
            self.gazetteer = None
        self._setDefaultParameters(
            {
                "classification": "multiclass",
                "speculation_words": True
            }, {"classification": ("multiclass", "speculation", "negation")})
        self.styles = self.getParameters(style)
コード例 #2
0
ファイル: KerasExampleBuilder.py プロジェクト: jbjorne/TEES
 def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None):
     if classSet == None:
         classSet = IdSet(0)
     if featureSet == None:
         featureSet = IdSet(0)
     
     ExampleBuilder.__init__(self, classSet, featureSet)
     
     self.featureIds = self.featureSet
     self.labelIds = self.classSet
     
     self._setDefaultParameters(["directed", "undirected", "cutoff", "annotated_only", "all_positive", "wv", 
                                 "epochs", "html", "autoencode", "lr", "patience"])
     self.styles = self.getParameters(style)
     if self.styles["cutoff"]:
         self.styles["cutoff"] = int(self.styles["cutoff"])
     
     self.wvIndices = None
     self.embeddingMatrices = None
     if self.styles.get("wv") != None:
         indexPath = self.styles.get("wv") + "-indices.json.gz"
         if not os.path.exists(indexPath):
             indexPath = os.path.join(Settings.DATAPATH, "wv", indexPath)
         print >> sys.stderr, "Loading word vector indices from", indexPath
         with gzip.open(indexPath, "rt") as f:
             self.wvIndices = json.load(f)["indices"]
         self.embeddingMatrices = []
     
     self.dimMatrix = 32
     self.rangeMatrix = range(self.dimMatrix)
     self.featureMatrices = []
     self.labelMatrices = []
     self.tokenLists = []
コード例 #3
0
    def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None, skiplist=None):
        if classSet == None:
            classSet = IdSet(1)
        if featureSet == None:
            featureSet = IdSet()
        
        ExampleBuilder.__init__(self, classSet, featureSet)
        assert( classSet.getId("neg") == 1 )
        #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
        if gazetteerFileName!=None:
            self.gazetteer=Gazetteer.loadGztr(gazetteerFileName)
            print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer=None
        self._setDefaultParameters(["rel_features", "wordnet", "bb_features", "giuliano", 
                                  "epi_merge_negated", "limit_merged_types", "genia_task1",
                                  "names", "build_for_nameless", "skip_for_nameless",
                                  "pos_only", "all_tokens", "pos_pairs", "linear_ngrams", 
                                  "phospho", "drugbank_features", "ddi13_features", "metamap", 
                                  "only_types", "ontobiotope_features", "bb_spans", "w2v",
                                  "no_context"])
        self.styles = self.getParameters(style)
#        if "selftrain_group" in self.styles:
#            self.selfTrainGroups = set()
#            if "selftrain_group-1" in self.styles:
#                self.selfTrainGroups.add("-1")
#            if "selftrain_group0" in self.styles:
#                self.selfTrainGroups.add("0")
#            if "selftrain_group1" in self.styles:
#                self.selfTrainGroups.add("1")
#            if "selftrain_group2" in self.styles:
#                self.selfTrainGroups.add("2")
#            if "selftrain_group3" in self.styles:
#                self.selfTrainGroups.add("3")
#            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups
        
        self.skiplist = set()
        if skiplist != None:
            f = open(skiplist, "rt")
            for line in f.readlines():
                self.skiplist.add(line.strip())
            f.close()
        
        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["wordnet"]:
            self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet)
        if self.styles["bb_features"]:
            self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens()
            #self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames())
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
        if self.styles["drugbank_features"]:
            self.drugFeatureBuilder = DrugFeatureBuilder(featureSet)
        if self.styles["ontobiotope_features"]:
            self.ontobiotopeFeatureBuilder = OntoBiotopeFeatureBuilder(self.featureSet)
        if self.styles["w2v"]:
            self.wordVectorFeatureBuilder = WordVectorFeatureBuilder(featureSet)
コード例 #4
0
    def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None,
                 skiplist=None):
        if classSet == None:
            classSet = IdSet(1)
        if featureSet == None:
            featureSet = IdSet()

        ExampleBuilder.__init__(self, classSet, featureSet)
        assert (classSet.getId("neg") == 1)
        #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
        if gazetteerFileName != None:
            self.gazetteer = Gazetteer.loadGztr(gazetteerFileName)
            print >> sys.stderr, "Loaded gazetteer from", gazetteerFileName
        else:
            print >> sys.stderr, "No gazetteer loaded"
            self.gazetteer = None
        self._setDefaultParameters([
            "rel_features", "wordnet", "bb_features", "giuliano",
            "epi_merge_negated", "limit_merged_types", "genia_task1",
            "build_for_nameless", "pos_only", "all_tokens", "names",
            "pos_pairs", "linear_ngrams", "phospho"
        ])
        self.styles = self.getParameters(style)
        #        if "selftrain_group" in self.styles:
        #            self.selfTrainGroups = set()
        #            if "selftrain_group-1" in self.styles:
        #                self.selfTrainGroups.add("-1")
        #            if "selftrain_group0" in self.styles:
        #                self.selfTrainGroups.add("0")
        #            if "selftrain_group1" in self.styles:
        #                self.selfTrainGroups.add("1")
        #            if "selftrain_group2" in self.styles:
        #                self.selfTrainGroups.add("2")
        #            if "selftrain_group3" in self.styles:
        #                self.selfTrainGroups.add("3")
        #            print >> sys.stderr, "Self-train-groups:", self.selfTrainGroups

        self.skiplist = set()
        if skiplist != None:
            f = open(skiplist, "rt")
            for line in f.readlines():
                self.skiplist.add(line.strip())
            f.close()

        if self.styles["rel_features"]:
            self.relFeatureBuilder = RELFeatureBuilder(featureSet)
        if self.styles["wordnet"]:
            self.wordNetFeatureBuilder = WordNetFeatureBuilder(featureSet)
        if self.styles["bb_features"]:
            self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(
            )
            #self.bacteriaTokens = PhraseTriggerExampleBuilder.getBacteriaTokens(PhraseTriggerExampleBuilder.getBacteriaNames())
        if self.styles["giuliano"]:
            self.giulianoFeatureBuilder = GiulianoFeatureBuilder(featureSet)
コード例 #5
0
    def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None):
        if classSet == None:
            classSet = IdSet(1)
        assert classSet.getId("neg") == 1
        if featureSet == None:
            featureSet = IdSet()
        ExampleBuilder.__init__(self, classSet, featureSet)

        self._setDefaultParameters(["co_limits"])
        self.styles = self.getParameters(style)
        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder.useNonNameEntities = False
        self.phraseTypeCounts = {}
コード例 #6
0
    def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None):
        if classSet == None:
            classSet = IdSet(1)
        assert (classSet.getId("neg") == 1)
        if featureSet == None:
            featureSet = IdSet()
        ExampleBuilder.__init__(self, classSet, featureSet)

        self._setDefaultParameters(["co_limits"])
        self.styles = self.getParameters(style)
        self.triggerFeatureBuilder = TriggerFeatureBuilder(self.featureSet)
        self.triggerFeatureBuilder.useNonNameEntities = False
        self.phraseTypeCounts = {}
コード例 #7
0
 def __init__(self, style=None, classSet=None, featureSet=None, gazetteerFileName=None):
     global speculationWords
     
     if classSet == None:
         classSet = IdSet(1)
     assert( classSet.getId("neg") == 1 )
     if featureSet == None:
         featureSet = IdSet()
     
     self.specWords, self.specWordStems = readWords(speculationWords) 
     
     ExampleBuilder.__init__(self, classSet, featureSet)
     #gazetteerFileName="/usr/share/biotext/GeniaChallenge/SharedTaskTriggerTest/gazetteer-train"
     if gazetteerFileName!=None:
         self.gazetteer=Gazetteer.loadGztr(gazetteerFileName)
         print >> sys.stderr, "Loaded gazetteer from",gazetteerFileName
     else:
         self.gazetteer=None
     self._setDefaultParameters({"classification":"multiclass", "speculation_words":True}, {"classification":("multiclass", "speculation", "negation")})
     self.styles = self.getParameters(style)
コード例 #8
0
    def __init__(self,
                 style=None,
                 classSet=None,
                 featureSet=None,
                 gazetteerFileName=None,
                 skiplist=None):
        if classSet == None:
            classSet = IdSet(0)
        if featureSet == None:
            featureSet = IdSet(0)

        ExampleBuilder.__init__(self, classSet, featureSet)

        self.featureIds = self.featureSet
        self.labelIds = self.classSet

        self._setDefaultParameters([
            "directed", "undirected", "cutoff", "annotated_only",
            "all_positive", "wv", "epochs", "html", "autoencode", "lr",
            "patience"
        ])
        self.styles = self.getParameters(style)
        if self.styles["cutoff"]:
            self.styles["cutoff"] = int(self.styles["cutoff"])

        self.wvIndices = None
        self.embeddingMatrices = None
        if self.styles.get("wv") != None:
            indexPath = self.styles.get("wv") + "-indices.json.gz"
            if not os.path.exists(indexPath):
                indexPath = os.path.join(Settings.DATAPATH, "wv", indexPath)
            print >> sys.stderr, "Loading word vector indices from", indexPath
            with gzip.open(indexPath, "rt") as f:
                self.wvIndices = json.load(f)["indices"]
            self.embeddingMatrices = []

        self.dimMatrix = 32
        self.rangeMatrix = range(self.dimMatrix)
        self.featureMatrices = []
        self.labelMatrices = []
        self.tokenLists = []